In [1]:
import torch
import numpy as np

# 예시 구조 (모델 정의 예시)
class SimpleClassifier(torch.nn.Module):
    def __init__(self, proj_dim=512):
        super().__init__()
        self.classifier = torch.nn.Sequential(
            torch.nn.Linear(proj_dim * 4, 512),  # (img1, txt1, img2, txt2)
            torch.nn.ReLU(),
            torch.nn.Linear(512, 1)
        )

# 모델 인스턴스 생성 및 첫 번째 레이어 weight 추출
proj_dim = 512
model = SimpleClassifier(proj_dim=proj_dim)
first_linear_layer = model.classifier[0]
weight = first_linear_layer.weight.data.cpu().numpy()  # (512, 2048)

# 구간별로 나누기: image1, text1, image2, text2
img1_weight = weight[:, 0 : proj_dim]
txt1_weight = weight[:, proj_dim : proj_dim * 2]
img2_weight = weight[:, proj_dim * 2 : proj_dim * 3]
txt2_weight = weight[:, proj_dim * 3 : proj_dim * 4]

# L2 Norm으로 평균 중요도 측정
def mean_l2_norm(w): return np.linalg.norm(w, axis=1).mean()

results = {
    "Image1 Weight Norm": mean_l2_norm(img1_weight),
    "Text1  Weight Norm": mean_l2_norm(txt1_weight),
    "Image2 Weight Norm": mean_l2_norm(img2_weight),
    "Text2  Weight Norm": mean_l2_norm(txt2_weight),
}

# 출력
print("=== Modality별 Weight 영향도 (1st Linear Layer 기준) ===")
for k, v in results.items():
    print(f"{k}: {v:.4f}")

=== Modality별 Weight 영향도 (1st Linear Layer 기준) ===
Image1 Weight Norm: 0.2881
Text1  Weight Norm: 0.2892
Image2 Weight Norm: 0.2886
Text2  Weight Norm: 0.2886


In [4]:
import torch
import numpy as np
from transformers import CLIPModel

# 1. CLIP 모델 불러오기
clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")

# 2. 전체 모델 구조 정의
class FullModel(torch.nn.Module):
    def __init__(self, clip_model, proj_dim=512):
        super().__init__()
        self.clip = clip_model
        self.classifier = torch.nn.Sequential(
            torch.nn.Linear(proj_dim * 4, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, 1)
        )

    def forward(self, image1_feat, text1_feat, image2_feat, text2_feat):
        x = torch.cat([image1_feat, text1_feat, image2_feat, text2_feat], dim=1)
        return self.classifier(x)

# 3. 모델 인스턴스 생성 및 가중치 로드
proj_dim = 512
model = FullModel(clip_model=clip, proj_dim=proj_dim)

# ⚠️ 저장된 모델 가중치 불러오기 (FullModel 기준)
model_path = r"D:\Project\PJT_10\saved_models\clip_pair_best_epoch2_20250717_001936.pth"
state_dict = torch.load(model_path, map_location="cpu")
model.load_state_dict(state_dict)  # 이제 정상 로드될 것

# 4. classifier의 첫 Linear Layer 가중치 분석
first_linear = model.classifier[0]  # Linear(2048 → 512)
weight = first_linear.weight.data.cpu().numpy()

# 5. 모달리티별 weight 분리
img1_weight = weight[:, 0 : proj_dim]
txt1_weight = weight[:, proj_dim : proj_dim * 2]
img2_weight = weight[:, proj_dim * 2 : proj_dim * 3]
txt2_weight = weight[:, proj_dim * 3 : proj_dim * 4]

# 6. L2 Norm 계산
def mean_l2_norm(w): return np.linalg.norm(w, axis=1).mean()

results = {
    "Image1 Weight Norm": mean_l2_norm(img1_weight),
    "Text1  Weight Norm": mean_l2_norm(txt1_weight),
    "Image2 Weight Norm": mean_l2_norm(img2_weight),
    "Text2  Weight Norm": mean_l2_norm(txt2_weight),
}

# 7. 출력
print("=== Modality별 Weight 영향도 (1st Linear Layer 기준) ===")
for k, v in results.items():
    print(f"{k}: {v:.4f}")

=== Modality별 Weight 영향도 (1st Linear Layer 기준) ===
Image1 Weight Norm: 0.2896
Text1  Weight Norm: 0.3023
Image2 Weight Norm: 0.2893
Text2  Weight Norm: 0.3016
