<a href="https://colab.research.google.com/github/MoqiSheng/MoqiSheng.github.io/blob/main/finetune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
!unzip /content/drive/MyDrive/UrbanASIFpr/Anchor.zip -d /content/anchor

Archive:  /content/drive/MyDrive/UrbanASIFpr/Anchor.zip
  inflating: /content/anchor/1014.png  
  inflating: /content/anchor/1027.png  
  inflating: /content/anchor/1049.png  
  inflating: /content/anchor/1052.png  
  inflating: /content/anchor/1075.png  
  inflating: /content/anchor/1080.png  
  inflating: /content/anchor/1081.png  
  inflating: /content/anchor/1094.png  
  inflating: /content/anchor/1102.png  
  inflating: /content/anchor/1108.png  
  inflating: /content/anchor/111.png  
  inflating: /content/anchor/1111.png  
  inflating: /content/anchor/1116.png  
  inflating: /content/anchor/1117.png  
  inflating: /content/anchor/1136.png  
  inflating: /content/anchor/1140.png  
  inflating: /content/anchor/1143.png  
  inflating: /content/anchor/1162.png  
  inflating: /content/anchor/1198.png  
  inflating: /content/anchor/1209.png  
  inflating: /content/anchor/1216.png  
  inflating: /content/anchor/1220.png  
  inflating: /content/anchor/1246.png  
  inflating: /content/anc

In [13]:
import torch
from transformers import ViTForImageClassification, ViTFeatureExtractor
import os
from PIL import Image
import numpy as np

# 定义设备
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_NAME = 'google/vit-base-patch16-224-in21k'  # 使用预训练的模型

# 加载模型
model = torch.load('vit_trained.pt')  # 直接加载已保存的完整模型
model = model.to(DEVICE)

# 加载图片并进行预处理
def load_and_preprocess_image(image_path, feature_extractor):
    image = Image.open(image_path).convert("RGB")
    inputs = feature_extractor(images=image, return_tensors="pt")
    return inputs['pixel_values'].to(DEVICE)

# 提取 cls_token
def extract_cls_token(model, image_tensor):
    model.eval()  # 切换到评估模式
    with torch.no_grad():
        outputs = model.vit.embeddings(image_tensor)  # 获取 ViT 的输入嵌入层输出
        cls_token = outputs[:, 0]  # 获取 cls_token（第一个位置）
    return cls_token

def main():
    # 加载特征提取器
    feature_extractor = ViTFeatureExtractor.from_pretrained(MODEL_NAME)

    # 获取./anchor目录中的所有图片，按数字顺序排序
    image_paths = sorted(
        [os.path.join('./anchor', f) for f in os.listdir('./anchor') if f.endswith('.jpg') or f.endswith('.png')],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])  # 依据文件名中的数字进行排序
    )

    # 用于保存所有的 cls_token
    all_cls_tokens = []

    # 对每一张图片提取 cls_token
    for image_path in image_paths:
        image_tensor = load_and_preprocess_image(image_path, feature_extractor)
        cls_token = extract_cls_token(model, image_tensor)

        # 打印或保存 cls_token
        print(f"Extracted cls_token from {image_path}:")
        print(cls_token)

        # 将当前图像的 cls_token 添加到列表中
        all_cls_tokens.append(cls_token)

        # 可选：将 cls_token 保存到文件
        save_path = image_path.replace('./anchor', './cls_tokens').replace('.jpg', '_cls_token.pt').replace('.png', '_cls_token.pt')
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        torch.save(cls_token, save_path)

    # 将所有的 cls_token 拼接成一个大的张量
    all_cls_tokens_tensor = torch.cat(all_cls_tokens, dim=0)  # 在第0维（批量维度）拼接

    # 保存拼接后的张量
    torch.save(all_cls_tokens_tensor, './anchor.pt')
    print("All cls_tokens have been saved to './anchor.pt'.")

    all_cls_tokens_tensor = torch.load('./anchor.pt')
    print(f"The shape of all_cls_tokens: {all_cls_tokens_tensor.shape}")

if __name__ == '__main__':
    main()


  model = torch.load('vit_trained.pt')  # 直接加载已保存的完整模型


[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
         -1.2056e-02,  1.1111e-04, -1.8976e-02,  2.2658e-02, -1.4062e-02,
          3.7792e-02, -3.7289e-03,  2.2731e-01,  1.2378e-02,  6.2517e-03,
          1.5747e-02,  1.5512e-02,  2.5807e-02, -5.0136e-02,  1.0310e-02,
          3.2665e-03,  1.3553e-02, -3.6155e-03, -1.2046e-01, -1.0120e-02,
          4.7080e-02,  2.5218e-02, -2.2356e-02,  8.4411e-04,  1.3601e-02,
         -1.1087e-02, -3.9714e-05, -5.4450e-02,  8.0091e-03, -1.0219e-02,
          3.8926e-03,  2.4002e-03, -1.4441e-01,  2.9921e-01, -2.3563e-02,
          2.3284e-03,  1.4818e-02,  1.3586e-02, -1.3852e-02, -3.1480e-03,
         -3.6436e-01,  1.0347e-02, -1.1670e-01,  2.3083e-02,  1.8193e-02,
          1.2108e-02, -2.1990e-01, -2.9395e-01, -1.0154e+00,  4.4213e-02,
          3.6092e-03, -1.2163e-02,  4.6916e-02, -6.5579e-03,  8.7237e-02,
         -2.8664e-02, -3.8015e-02,  4.8130e-01,  1.0921e-01, -1.0675e-02,
          1.5280e-02, -2.1866e-02, -1.4599e-02,  1.4273e-01, -3.3619e-0

  all_cls_tokens_tensor = torch.load('./anchor.pt')


In [14]:
!unzip /content/drive/MyDrive/UrbanASIFpr/Predict.zip -d /content/predict

Archive:  /content/drive/MyDrive/UrbanASIFpr/Predict.zip
  inflating: /content/predict/0.png  
  inflating: /content/predict/1.png  
  inflating: /content/predict/10.png  
  inflating: /content/predict/100.png  
  inflating: /content/predict/1000.png  
  inflating: /content/predict/1001.png  
  inflating: /content/predict/1002.png  
  inflating: /content/predict/1003.png  
  inflating: /content/predict/1004.png  
  inflating: /content/predict/1005.png  
  inflating: /content/predict/1006.png  
  inflating: /content/predict/1007.png  
  inflating: /content/predict/1008.png  
  inflating: /content/predict/1009.png  
  inflating: /content/predict/101.png  
  inflating: /content/predict/1010.png  
  inflating: /content/predict/1011.png  
  inflating: /content/predict/1012.png  
  inflating: /content/predict/1013.png  
  inflating: /content/predict/1015.png  
  inflating: /content/predict/1016.png  
  inflating: /content/predict/1017.png  
  inflating: /content/predict/1018.png  
  inflatin

In [19]:
import torch
from transformers import ViTForImageClassification, ViTFeatureExtractor
import os
from PIL import Image
import numpy as np

# 定义设备
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_NAME = 'google/vit-base-patch16-224-in21k'  # 使用预训练的模型

# 加载模型
model = torch.load('vit_trained.pt')  # 直接加载已保存的完整模型
model = model.to(DEVICE)

# 加载图片并进行预处理
def load_and_preprocess_image(image_path, feature_extractor):
    image = Image.open(image_path).convert("RGB")
    inputs = feature_extractor(images=image, return_tensors="pt")
    return inputs['pixel_values'].to(DEVICE)

# 提取 cls_token
def extract_cls_token(model, image_tensor):
    model.eval()  # 切换到评估模式
    with torch.no_grad():
        outputs = model.vit.embeddings(image_tensor)  # 获取 ViT 的输入嵌入层输出
        cls_token = outputs[:, 0]  # 获取 cls_token（第一个位置）
    return cls_token

def main():
    # 加载特征提取器
    feature_extractor = ViTFeatureExtractor.from_pretrained(MODEL_NAME)

    # 获取./anchor目录中的所有图片，按数字顺序排序
    image_paths = sorted(
        [os.path.join('./predict', f) for f in os.listdir('./predict') if f.endswith('.jpg') or f.endswith('.png')],
        key=lambda x: int(os.path.splitext(os.path.basename(x))[0])  # 依据文件名中的数字进行排序
    )

    # 用于保存所有的 cls_token
    all_cls_tokens = []

    # 对每一张图片提取 cls_token
    for image_path in image_paths:
        image_tensor = load_and_preprocess_image(image_path, feature_extractor)
        cls_token = extract_cls_token(model, image_tensor)

        # 打印或保存 cls_token
        # print(f"Extracted cls_token from {image_path}:")
        # print(cls_token)

        # 将当前图像的 cls_token 添加到列表中
        all_cls_tokens.append(cls_token)

        # 可选：将 cls_token 保存到文件
        save_path = image_path.replace('./predict', './cls_tokens').replace('.jpg', '_cls_token.pt').replace('.png', '_cls_token.pt')
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        torch.save(cls_token, save_path)

    # 将所有的 cls_token 拼接成一个大的张量
    all_cls_tokens_tensor = torch.cat(all_cls_tokens, dim=0)  # 在第0维（批量维度）拼接

    # 保存拼接后的张量
    torch.save(all_cls_tokens_tensor, './predict.pt')
    print("All cls_tokens have been saved to './predict.pt'.")

    all_cls_tokens_tensor = torch.load('./predict.pt')
    print(f"The shape of all_cls_tokens: {all_cls_tokens_tensor.shape}")

if __name__ == '__main__':
    main()

  model = torch.load('vit_trained.pt')  # 直接加载已保存的完整模型


All cls_tokens have been saved to './predict.pt'.
The shape of all_cls_tokens: torch.Size([1366, 768])


  all_cls_tokens_tensor = torch.load('./predict.pt')


In [10]:
!rm -r ./anchor