<a href="https://colab.research.google.com/github/MoqiSheng/MoqiSheng.github.io/blob/main/classification_similarity1228.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!unzip /content/drive/MyDrive/CommonFeatures/correct_function.zip -d /content/Data

Archive:  /content/drive/MyDrive/CommonFeatures/correct_function.zip
   creating: /content/Data/civic, governmental and cultural/
  inflating: /content/Data/civic, governmental and cultural/0.png  
  inflating: /content/Data/civic, governmental and cultural/1035.png  
  inflating: /content/Data/civic, governmental and cultural/1056.png  
  inflating: /content/Data/civic, governmental and cultural/1219.png  
  inflating: /content/Data/civic, governmental and cultural/1246.png  
  inflating: /content/Data/civic, governmental and cultural/1266.png  
  inflating: /content/Data/civic, governmental and cultural/1272.png  
  inflating: /content/Data/civic, governmental and cultural/1286.png  
  inflating: /content/Data/civic, governmental and cultural/1287.png  
  inflating: /content/Data/civic, governmental and cultural/1288.png  
  inflating: /content/Data/civic, governmental and cultural/1289.png  
  inflating: /content/Data/civic, governmental and cultural/1290.png  
  inflating: /content

In [4]:
import os
import torch
from transformers import AutoFeatureExtractor, AutoModel
from PIL import Image
from tqdm import tqdm
import pandas as pd

# 设置设备为 GPU（如果可用）
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# 加载预训练的 facebook/vit-large-patch14-336 模型和特征提取器
model_name = "facebook/vit-large-patch14-336"
print(f"Loading model and feature extractor for {model_name}...")
model = AutoModel.from_pretrained(model_name).to(device)
feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
model.eval()  # 设置模型为评估模式

# 定义特征提取函数
def extract_features(image_folder, output_file):
    image_features_list = []
    image_filenames = []

    # 获取并按文件名中的数字顺序排序图像路径
    image_paths = sorted(
        [
            os.path.join(image_folder, img)
            for img in os.listdir(image_folder)
            if img.lower().endswith(('.jpg', '.png'))
        ],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
    )

    print(f"Found {len(image_paths)} images in {image_folder}.")

    # 处理每一张图像
    for image_path in tqdm(image_paths, desc=f"Processing {image_folder}"):
        try:
            # 加载并确保图像为 RGB 格式
            image = Image.open(image_path).convert("RGB")

            # 使用特征提取器进行预处理
            inputs = feature_extractor(images=image, return_tensors="pt").to(device)

            # 禁用梯度计算，提高效率
            with torch.no_grad():
                outputs = model(**inputs)

                # 提取 [CLS] token 的特征
                image_features = outputs.last_hidden_state[:, 0]

                # 归一化特征向量
                image_features /= image_features.norm(dim=-1, keepdim=True)

                # 将特征移动到 CPU 并添加到列表中
                image_features_list.append(image_features.cpu())
                image_filenames.append(os.path.basename(image_path))

        except Exception as e:
            print(f"处理 {image_path} 时出错: {e}")

    # 将所有特征拼接成一个大的张量
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)
        torch.save(image_features, output_file)
        print(f"特征已保存到 {output_file}")
    else:
        print(f"在 {image_folder} 中未找到有效的图像")
        return None, None

    return image_features, image_filenames

# 定义计算余弦相似度并保存为 CSV 的函数
def compute_and_save_cosine_similarity(features, filenames, csv_output):
    if features is None or filenames is None:
        print("特征或文件名为空，无法计算余弦相似度。")
        return

    print("计算余弦相似度矩阵...")

    # 确保特征已经归一化
    # 余弦相似度可以通过特征矩阵与其转置的点积计算
    similarity_matrix = torch.mm(features, features.t()).numpy()

    print(f"相似度矩阵大小: {similarity_matrix.shape}")

    # 将相似度矩阵转换为 DataFrame
    df = pd.DataFrame(similarity_matrix, index=filenames, columns=filenames)

    # 保存为 CSV 文件
    df.to_csv(csv_output)
    print(f"余弦相似度矩阵已保存到 {csv_output}")

# 主流程
if __name__ == "__main__":
    # 定义图像文件夹和输出文件
    image_folder = './Data/civic, governmental and cultural'
    feature_output = './imgs_vit_1.pt'
    csv_output = './cosine_similarity_matrix.csv'

    # 提取特征
    features, filenames = extract_features(image_folder, feature_output)

    # 计算余弦相似度并保存为 CSV
    compute_and_save_cosine_similarity(features, filenames, csv_output)


Using device: cuda
Loading model and feature extractor for facebook/vit-large-patch14-336...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


OSError: facebook/vit-large-patch14-336 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [6]:
 pip install git+https://github.com/openai/CLIP.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-nrekrqm2
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-nrekrqm2
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ftfy (from clip==1.0)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369489 sha256=ad5b9081a9e0a5b54eca64784712b5c0858dc11fd709abcaa95d2136434fcde4
  Stored in directory: /tmp/pip-ephem-wheel-cache-ah4bl8fd/wheels/da/2b/4c/d6691fa9597aac8bb

In [8]:
import os
import torch
import clip
from PIL import Image
from tqdm import tqdm
import pandas as pd

# 确保你已经安装了 CLIP 库。如果尚未安装，可以使用以下命令：
# pip install git+https://github.com/openai/CLIP.git

# 设置设备为 GPU（如果可用）
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# 加载预训练的 CLIP 模型和预处理函数
model_name = "ViT-L/14@336px"  # 确保该模型名称在 CLIP 库中可用
print(f"Loading CLIP model: {model_name}...")
model, preprocess = clip.load(model_name, device=device)
model.eval()  # 设置模型为评估模式
print("CLIP 模型加载完成。")

# 定义特征提取函数
def extract_features(image_folder, output_file):
    image_features_list = []
    image_filenames = []

    # 获取并按文件名中的数字顺序排序图像路径
    image_paths = sorted(
        [
            os.path.join(image_folder, img)
            for img in os.listdir(image_folder)
            if img.lower().endswith(('.jpg', '.png'))
        ],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
    )

    print(f"在文件夹 {image_folder} 中找到 {len(image_paths)} 张图像。")

    # 处理每一张图像
    for image_path in tqdm(image_paths, desc=f"Processing {image_folder}"):
        try:
            # 加载并预处理图像
            image = Image.open(image_path).convert("RGB")
            image_input = preprocess(image).unsqueeze(0).to(device)  # 添加批次维度并移动到设备

            # 禁用梯度计算，提高效率
            with torch.no_grad():
                image_features = model.encode_image(image_input)  # 编码图像
                image_features /= image_features.norm(dim=-1, keepdim=True)  # 归一化特征向量

            # 将特征移动到 CPU 并添加到列表中
            image_features_list.append(image_features.cpu())
            image_filenames.append(os.path.basename(image_path))

        except Exception as e:
            print(f"处理 {image_path} 时出错: {e}")

    # 将所有特征拼接成一个大的张量
    if image_features_list:
        image_features = torch.cat(image_features_list, dim=0)  # [num_images, feature_dim]
        torch.save(image_features, output_file)
        print(f"特征已保存到 {output_file}")
    else:
        print(f"在 {image_folder} 中未找到有效的图像")
        return None, None

    return image_features, image_filenames

# 定义计算余弦相似度并保存为 CSV 的函数
def compute_and_save_cosine_similarity(features, filenames, csv_output):
    if features is None or filenames is None:
        print("特征或文件名为空，无法计算余弦相似度。")
        return

    print("计算余弦相似度矩阵...")

    # 余弦相似度可以通过特征矩阵与其转置的点积计算（假设特征已经归一化）
    similarity_matrix = torch.mm(features, features.t()).numpy()  # [num_images, num_images]

    print(f"相似度矩阵大小: {similarity_matrix.shape}")

    # 将相似度矩阵转换为 DataFrame
    df = pd.DataFrame(similarity_matrix, index=filenames, columns=filenames)

    # 保存为 CSV 文件
    df.to_csv(csv_output)
    print(f"余弦相似度矩阵已保存到 {csv_output}")

# 主流程
if __name__ == "__main__":
    # 定义图像文件夹和输出文件路径
    image_folder = './Data/commercial'
    feature_output = './imgs_vit_2.pt'
    csv_output = './cosine_similarity_matrix_2.csv'

    # 确保输出目录存在
    os.makedirs(os.path.dirname(feature_output), exist_ok=True)
    os.makedirs(os.path.dirname(csv_output), exist_ok=True)

    # 提取特征
    features, filenames = extract_features(image_folder, feature_output)

    # 计算余弦相似度并保存为 CSV
    compute_and_save_cosine_similarity(features, filenames, csv_output)


Using device: cuda
Loading CLIP model: ViT-L/14@336px...
CLIP 模型加载完成。
在文件夹 ./Data/commercial 中找到 208 张图像。


Processing ./Data/commercial: 100%|██████████| 208/208 [00:05<00:00, 36.13it/s]


特征已保存到 ./imgs_vit_2.pt
计算余弦相似度矩阵...
相似度矩阵大小: (208, 208)
余弦相似度矩阵已保存到 ./cosine_similarity_matrix_2.csv
