<a href="https://colab.research.google.com/github/LianBiao123/Myself-LB/blob/master/compare.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch torchvision



In [5]:
!pip install git+https://github.com/openai/CLIP.git
!pip install ftfy regex tqdm

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-5pmii5g7
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-5pmii5g7
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ftfy (from clip==1.0)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369490 sha256=83e5a2ccebc95b17634cf6b95d2a029500592598f0ad47cc592be47398913b9f
  Stored in directory: /tmp/pip-ephem-wheel-cache-iq0m81ar/wheels/35/3e/df/3d24cbfb3b6a06f17

In [6]:
import torch
import torchvision
from torchvision import transforms
from PIL import Image
import numpy as np
import clip
import requests
from io import BytesIO

# 设置设备
device = "cuda" if torch.cuda.is_available() else "cpu"

In [7]:
# 加载CLIP模型
clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)

# 加载ResNet50模型
resnet_model = torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.IMAGENET1K_V1)
resnet_model.eval().to(device)

# ResNet50预处理
resnet_preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

100%|███████████████████████████████████████| 338M/338M [00:06<00:00, 58.2MiB/s]


In [14]:
# 下载ImageNet类别文件
import urllib.request
urllib.request.urlretrieve(
    "https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt",
    "imagenet_classes.txt"
)

with open("imagenet_classes.txt", "r") as f:
    imagenet_classes = [s.strip() for s in f.readlines()]

In [21]:
def compare_models_for_group(image_path, group_name, prompts_zh, prompts_en, imagenet_mapping):
    """
    对比CLIP和ResNet50在特定实验组的表现

    参数:
    - image_path: 图片路径
    - group_name: 实验组名称
    - prompts_zh: 中文提示词列表
    - prompts_en: 英文提示词列表
    - imagenet_mapping: 到ImageNet类别的映射
    """
    print(f"\n{'='*60}")
    print(f"实验组: {group_name}")
    print(f"{'='*60}")

    # 加载图像
    try:
        image = Image.open(image_path)
        print(f"成功加载图像: {image_path}")
    except Exception as e:
        print(f"无法加载图像: {image_path}, 错误: {e}")
        # 创建占位图像用于演示
        image = Image.new('RGB', (224, 224), color='red')
        print("使用占位图像继续演示")

    # CLIP预测
    print(f"\n1. CLIP模型预测 (提示词: {prompts_zh})")
    print("-" * 50)

    text = clip.tokenize(prompts_en).to(device)
    image_input = clip_preprocess(image).unsqueeze(0).to(device)

    with torch.no_grad():
        logits_per_image, _ = clip_model(image_input, text)
        clip_probs = logits_per_image.softmax(dim=-1).cpu().numpy()[0]

    # 显示CLIP预测结果
    clip_predictions = []
    for i, prompt in enumerate(prompts_zh):
        clip_predictions.append((prompt, clip_probs[i]))
        print(f"{prompt}: {clip_probs[i]:.4f}")

    # 找出CLIP的最佳预测
    clip_best = max(clip_predictions, key=lambda x: x[1])
    print(f"CLIP最佳预测: '{clip_best[0]}' (概率: {clip_best[1]:.4f})")

    # ResNet50预测
    print(f"\n2. ResNet50模型预测")
    print("-" * 50)

    # 预处理图像
    image_input = resnet_preprocess(image).unsqueeze(0).to(device)

    with torch.no_grad():
        outputs = resnet_model(image_input)
        probs = torch.nn.functional.softmax(outputs[0], dim=0)

    # 获取ResNet50的Top-5预测
    top5_probs, top5_indices = torch.topk(probs, 5)

    print("ResNet50 Top-5预测:")
    resnet_top5 = []
    for i in range(5):
        class_name = imagenet_classes[top5_indices[i]]
        probability = top5_probs[i].item()
        resnet_top5.append((class_name, probability))
        print(f"  {i+1}. {class_name}: {probability:.4f}")

    # 尝试将提示词映射到ResNet50的预测
    print(f"\n3. 提示词在ResNet50中的对应概率")
    print("-" * 50)

    resnet_prompt_probs = []
    for prompt_zh, prompt_en in zip(prompts_zh, prompts_en):
        mapped_label = imagenet_mapping.get(prompt_en, prompt_en)
        if mapped_label in imagenet_classes:
            idx = imagenet_classes.index(mapped_label)
            probability = probs[idx].item()
            resnet_prompt_probs.append((prompt_zh, probability))
            print(f"{prompt_zh} -> {mapped_label}: {probability:.4f}")
        else:
            resnet_prompt_probs.append((prompt_zh, 0.0))
            print(f"{prompt_zh}: 在ImageNet中无对应类别")

    # 对比分析
    print(f"\n4. 模型对比分析")
    print("-" * 50)

    # CLIP的最佳预测
    clip_best_prompt, clip_best_prob = clip_best

    # 在ResNet50中找到对应提示词的最高概率
    if resnet_prompt_probs:
        resnet_best_prompt, resnet_best_prob = max(resnet_prompt_probs, key=lambda x: x[1])

        print(f"CLIP最佳预测: '{clip_best_prompt}' (概率: {clip_best_prob:.4f})")
        print(f"ResNet50对应最佳: '{resnet_best_prompt}' (概率: {resnet_best_prob:.4f})")

        if resnet_best_prob > 0.01:  # 如果ResNet50有较确定的预测
            if clip_best_prompt == resnet_best_prompt:
                print("✓ 两个模型预测一致")
            else:
                print("✗ 两个模型预测不一致")

            # 计算置信度差异
            conf_diff = abs(clip_best_prob - resnet_best_prob)
            print(f"置信度差异: {conf_diff:.4f}")
        else:
            print("ResNet50无法有效识别该类别")

        # 实验目的验证
        if "已知领域" in group_name or "组1" in group_name:
            if clip_best_prob > 0.7 and resnet_best_prob > 0.7:
                print("✓ 验证结果: 在已知领域CLIP能达到CNN水平")
            else:
                print("✗ 验证结果: 在已知领域CLIP未能达到CNN水平")

        elif "未训练物体" in group_name or "组2" in group_name:
            if resnet_best_prob < 0.1:
                print("✓ 验证结果: CNN对未训练物体直接失效")
            else:
                print("✗ 验证结果: CNN对未训练物体仍有一定识别能力")

        elif "抽象概念" in group_name or "组3" in group_name:
            if resnet_best_prob < 0.05:
                print("✓ 验证结果: CNN完全无法处理抽象概念")
            else:
                print("✗ 验证结果: CNN对抽象概念有一定识别能力")

        elif "跨域数据" in group_name or "组4" in group_name:
            if resnet_best_prob < 0.1:
                print("✓ 验证结果: CNN对跨域数据鲁棒性差")
            else:
                print("✗ 验证结果: CNN对跨域数据有一定鲁棒性")
    else:
        print("ResNet50无法识别任何指定类别")

    return {
        "group": group_name,
        "clip_best": clip_best,
        "resnet_best": resnet_prompt_probs[0] if resnet_prompt_probs else ("未知", 0.0),
        "resnet_top5": resnet_top5
    }

In [23]:
# 主函数 - 运行四组实验
def run_all_experiments():
    """运行四组对比实验"""
    print("CLIP vs ResNet50 四组对比实验")
    print("基于实验设计表格的完整对比")
    print("=" * 60)

    # 定义四组实验的参数
    experiments = [
        {
            "name": "组1 - ResNet见过的类别",
            "image_path": "/golden_retriever.jpg",  # 请替换为实际图片路径
            "prompts_zh": ["金毛犬", "波斯猫", "客机"],
            "prompts_en": ["golden retriever", "Persian cat", "airliner"],
            "imagenet_mapping": {
                "golden retriever": "golden retriever",
                "Persian cat": "Persian cat",
                "airliner": "airliner"
            }
        },
        {
            "name": "组2 - ResNet未见的物体",
            "image_path": "/red_envelope.jpg",  # 请替换为实际图片路径
            "prompts_zh": ["红包", "二维码", "麻将牌"],
            "prompts_en": ["red envelope", "QR code", "mahjong tile"],
            "imagenet_mapping": {
                "red envelope": "piggy bank",  # 近似映射
                "QR code": "modem",  # 近似映射
                "mahjong tile": "domino"  # 近似映射
            }
        },
        {
            "name": "组3 - ResNet未见抽象概念",
            "image_path": "/new_year_atmosphere.jpg",  # 请替换为实际图片路径
            "prompts_zh": ["喜庆氛围", "商业促销", "年味"],
            "prompts_en": ["festive atmosphere", "commercial promotion", "new year atmosphere"],
            "imagenet_mapping": {
                "festive atmosphere": "fireworks",  # 近似映射
                "commercial promotion": "shop",  # 近似映射
                "new year atmosphere": "lantern"  # 近似映射
            }
        },
        {
            "name": "组4 - 跨域数据（漫画/水彩）",
            "image_path": "/cartoon_red_envelope.png",  # 请替换为实际图片路径
            "prompts_zh": ["漫画红包", "水彩春节", "卡通年兽"],
            "prompts_en": ["comic red envelope", "watercolor spring festival", "cartoon new year beast"],
            "imagenet_mapping": {
                "comic red envelope": "comic book",  # 近似映射
                "watercolor spring festival": "watercolor",  # 近似映射
                "cartoon new year beast": "cartoon"  # 近似映射
            }
        }
    ]

    results = []

    # 运行每组实验
    for exp in experiments:
        result = compare_models_for_group(
            exp["image_path"],
            exp["name"],
            exp["prompts_zh"],
            exp["prompts_en"],
            exp["imagenet_mapping"]
        )
        results.append(result)

    # 总结所有实验结果
    print(f"\n{'='*60}")
    print("实验总结")
    print(f"{'='*60}")

    for result in results:
        group = result["group"]
        clip_best = result["clip_best"]
        resnet_best = result["resnet_best"]

        print(f"\n{group}:")
        print(f"  CLIP最佳预测: {clip_best[0]} (概率: {clip_best[1]:.4f})")
        print(f"  ResNet50最佳对应: {resnet_best[0]} (概率: {resnet_best[1]:.4f})")

        # 判断模型表现
        if resnet_best[1] > 0.5:
            print("  ResNet50表现: 良好")
        elif resnet_best[1] > 0.1:
            print("  ResNet50表现: 一般")
        else:
            print("  ResNet50表现: 较差")

        if clip_best[1] > 0.5:
            print("  CLIP表现: 良好")
        elif clip_best[1] > 0.1:
            print("  CLIP表现: 一般")
        else:
            print("  CLIP表现: 较差")
if __name__ == "__main__":
    run_all_experiments()

CLIP vs ResNet50 四组对比实验
基于实验设计表格的完整对比

实验组: 组1 - ResNet见过的类别
成功加载图像: /golden_retriever.jpg

1. CLIP模型预测 (提示词: ['金毛犬', '波斯猫', '客机'])
--------------------------------------------------
金毛犬: 1.0000
波斯猫: 0.0000
客机: 0.0000
CLIP最佳预测: '金毛犬' (概率: 1.0000)

2. ResNet50模型预测
--------------------------------------------------
ResNet50 Top-5预测:
  1. golden retriever: 0.9110
  2. Labrador retriever: 0.0169
  3. Pembroke: 0.0070
  4. tennis ball: 0.0039
  5. Leonberg: 0.0038

3. 提示词在ResNet50中的对应概率
--------------------------------------------------
金毛犬 -> golden retriever: 0.9110
波斯猫 -> Persian cat: 0.0000
客机 -> airliner: 0.0000

4. 模型对比分析
--------------------------------------------------
CLIP最佳预测: '金毛犬' (概率: 1.0000)
ResNet50对应最佳: '金毛犬' (概率: 0.9110)
✓ 两个模型预测一致
置信度差异: 0.0890
✓ 验证结果: 在已知领域CLIP能达到CNN水平

实验组: 组2 - ResNet未见的物体
成功加载图像: /red_envelope.jpg

1. CLIP模型预测 (提示词: ['红包', '二维码', '麻将牌'])
--------------------------------------------------
红包: 1.0000
二维码: 0.0000
麻将牌: 0.0000
CLIP最佳预测: '红包' (概率: 1.0000)

