In [1]:
import logging
import os
import pdb
import math
import glob
import random
import time
import pickle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from PIL import Image
from sklearn.preprocessing import LabelEncoder

from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split

import torchvision.transforms as T

import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.nn.parameter import Parameter

from tqdm import tqdm
import sys

import copy
from scipy import spatial
import csv

from perceiver import crop, patchify, get_patch_coords, ImageDataset, PerceiverBlock, Perceiver, CustomDataset, CombinedModel

In [2]:
def seed_everything(seed):
    torch.manual_seed(seed) #torch를 거치는 모든 난수들의 생성순서를 고정한다
    torch.cuda.manual_seed(seed) #cuda를 사용하는 메소드들의 난수시드는 따로 고정해줘야한다 
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True #딥러닝에 특화된 CuDNN의 난수시드도 고정 
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed) #numpy를 사용할 경우 고정
    random.seed(seed) #파이썬 자체 모듈 random 모듈의 시드 고정
seed_everything(42)

In [3]:
root_dir = '/home/youlee/perceiver/perceiver/model/'
loader_dir = '/home/youlee/perceiver/perceiver/loader/'

batch_size = 32

In [4]:
input_models = []
valid_loaders = []

### Text Modality:

In [5]:
for i in range (6):
    text_model = torch.load(root_dir + f'text_model_{i+1}.pkl')
    input_models.append(text_model)
    print(f"Text model {i+1}번 불러오기 완료.")

    with open(loader_dir+f'text_val_loader_{i+1}.pkl', 'rb') as f:
        loaded_valid_dataset = pickle.load(f)
    valid_loaders.append(loaded_valid_dataset)
    print(f"Text val. loader {i}번 불러오기 완료.")

  text_model = torch.load(root_dir + f'text_model_{i+1}.pkl')


Text model 1번 불러오기 완료.
Text val. loader 0번 불러오기 완료.
Text model 2번 불러오기 완료.
Text val. loader 1번 불러오기 완료.
Text model 3번 불러오기 완료.
Text val. loader 2번 불러오기 완료.
Text model 4번 불러오기 완료.
Text val. loader 3번 불러오기 완료.
Text model 5번 불러오기 완료.
Text val. loader 4번 불러오기 완료.
Text model 6번 불러오기 완료.
Text val. loader 5번 불러오기 완료.


### Image Modality:

In [6]:
for i in range(6):
    img_model = torch.load(root_dir + f'image_model_{i+1}.pkl')
    input_models.append(img_model)
    print(f"Image model {i}번 불러오기 완료.")

    with open(loader_dir+f'image_val_loader_{i+1}.pkl', 'rb') as f:
        loaded_valid_dataset = pickle.load(f)

    valid_loader = DataLoader(loaded_valid_dataset, batch_size=batch_size, shuffle=False)
    valid_loaders.append(valid_loader)
    print(f"Image val. loader {i}번 불러오기 완료.")

Image model 0번 불러오기 완료.
Image val. loader 0번 불러오기 완료.


  img_model = torch.load(root_dir + f'image_model_{i+1}.pkl')


Image model 1번 불러오기 완료.
Image val. loader 1번 불러오기 완료.
Image model 2번 불러오기 완료.
Image val. loader 2번 불러오기 완료.
Image model 3번 불러오기 완료.
Image val. loader 3번 불러오기 완료.
Image model 4번 불러오기 완료.
Image val. loader 4번 불러오기 완료.
Image model 5번 불러오기 완료.
Image val. loader 5번 불러오기 완료.


In [7]:
valid_loaders  

[<torch.utils.data.dataloader.DataLoader at 0x7f2226794430>,
 <torch.utils.data.dataloader.DataLoader at 0x7f219afa2a40>,
 <torch.utils.data.dataloader.DataLoader at 0x7f20d5639660>,
 <torch.utils.data.dataloader.DataLoader at 0x7f20d563b3a0>,
 <torch.utils.data.dataloader.DataLoader at 0x7f2225305630>,
 <torch.utils.data.dataloader.DataLoader at 0x7f20d4dd1d50>,
 <torch.utils.data.dataloader.DataLoader at 0x7f20d53d7bb0>,
 <torch.utils.data.dataloader.DataLoader at 0x7f20d55fbdc0>,
 <torch.utils.data.dataloader.DataLoader at 0x7f20d4aeba90>,
 <torch.utils.data.dataloader.DataLoader at 0x7f20d4997dc0>,
 <torch.utils.data.dataloader.DataLoader at 0x7f20d4463ca0>,
 <torch.utils.data.dataloader.DataLoader at 0x7f20d41c8040>]

In [8]:
# valid_loader 1-12까지 첫번째 배치 shape 확인

for idx, loader in enumerate(valid_loaders):
    print(f"--- Valid Loader {idx + 1} ---")
    
    batch = next(iter(loader))  
    
    # 배치가 리스트 형식일 경우, inputs과 labels 출력
    if isinstance(batch, list):
        print(f"Inputs shape: {batch[0].shape}, Labels shape: {batch[1].shape}")
    elif isinstance(batch, dict):
        # 배치가 딕셔너리 형식일 때, 각 key-value 쌍의 shape 출력
        print({key: value.shape for key, value in batch.items()})
    elif isinstance(batch, tuple):
        # 배치가 튜플 형식일 때, Inputs과 Labels의 shape 출력
        print(f"Inputs shape: {batch[0].shape}, Labels shape: {batch[1].shape}")
    else:
        print(f"Unknown batch format: {type(batch)}")

--- Valid Loader 1 ---
{'input_ids': torch.Size([32, 128]), 'attention_mask': torch.Size([32, 128]), 'labels': torch.Size([32])}
--- Valid Loader 2 ---
{'input_ids': torch.Size([32, 128]), 'attention_mask': torch.Size([32, 128]), 'labels': torch.Size([32])}
--- Valid Loader 3 ---
{'input_ids': torch.Size([32, 128]), 'attention_mask': torch.Size([32, 128]), 'labels': torch.Size([32])}
--- Valid Loader 4 ---
{'input_ids': torch.Size([32, 128]), 'attention_mask': torch.Size([32, 128]), 'labels': torch.Size([32])}
--- Valid Loader 5 ---
{'input_ids': torch.Size([32, 128]), 'attention_mask': torch.Size([32, 128]), 'labels': torch.Size([32])}
--- Valid Loader 6 ---
{'input_ids': torch.Size([32, 128]), 'attention_mask': torch.Size([32, 128]), 'labels': torch.Size([32])}
--- Valid Loader 7 ---
Inputs shape: torch.Size([32, 196, 770]), Labels shape: torch.Size([32])
--- Valid Loader 8 ---
Inputs shape: torch.Size([32, 196, 770]), Labels shape: torch.Size([32])
--- Valid Loader 9 ---
Inputs shap

In [9]:
# image, text 모델 확인
image_model_path = "/home/youlee/perceiver/perceiver/model/image_model_1.pkl"
text_model_path = "/home/youlee/perceiver/perceiver/model/text_model_1.pkl"

image_model = torch.load(image_model_path)
print("Image Model:")
print(image_model)

text_model = torch.load(text_model_path)
print("\nText Model:")
print(text_model)

Image Model:
Perceiver(
  (input_projection): Linear(in_features=770, out_features=128, bias=True)
  (blocks): ModuleList(
    (0-3): 4 x PerceiverBlock(
      (cross_attn): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
      )
      (cross_ln): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (self_attn_layers): ModuleList(
        (0-9): 10 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
          )
          (linear1): Linear(in_features=128, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=2048, out_features=128, bias=True)
          (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inpl

  image_model = torch.load(image_model_path)
  text_model = torch.load(text_model_path)


In [10]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

## Define ModelDiff

In [11]:
num_classes = -1
lr = 0.1
batch_size = 32
val_batch_size = 100
workers = 24
weight_decay = 4e-5
dataset_name = ''
train_path = ''
val_path = ''
cuda = True
seed = 1
epochs = 160
restore_epoch = 0
save_folder = ''
load_folder = ''
one_shot_prune_perc = 0.5
mode = ''
logfile = ''
initial_from_task = ''

In [12]:
DATASETS = [
    'Opinion','Art & Design','Television',
    'Music','Travel','Real Estate',
    'Books','Theater','Health',
    'Sports','Science','Food',
    'Fashion & Style','Movies','Technology',
    'Dance', 'Media', 'Style'
]

In [13]:
epsilon = 0.1
max_iterations = 100

## 유사도검색

task_id 책정방식: \
0~5 : text modality \
6~11: image modality

## Approach 1: 특정 input data로 유사도 검증

In [14]:
"""
(output_layer): Linear(in_features=128, out_features=24, bias=True)
이미지 모델 출력: (batch_size, 24)

(output_layer): Linear(in_features=64, out_features=3, bias=True)
텍스트 모델 출력: (batch_size, 3)

perceiver 모델에서 output은 (batch_size, num_classes)
이미지와 텍스트의 num_classes가 다르게 되어있음 

text baseline 코드에서 out_feature를 24로 수정하면 되지 않음?
-> 데이터셋을 class 3개인 6개 그룹으로 나눴고 num_classes = len(label_encoder.classes_)로 해뒀음..

=> 이미지랑 텍스트를 같은 차원으로 매핑하는 projection layer 넣음
"""

# Projection Layer 정의
projection_dim = 24  # 이 값에 따라서 best target_id 조금씩 달라짐 (128로 해봤는데 달라짐)
image_projection = nn.Linear(24, projection_dim).to(DEVICE)
text_projection = nn.Linear(3, projection_dim).to(DEVICE)

In [15]:
# DDV 계산: Cosine Distance
def compute_ddv_cos(
    model1, model2,
    inputs1, inputs2,            
    proj1, proj2,
    is_model1_text=False, # model1과 model2가 텍스트 모델인지 여부 확인인
    is_model2_text=False,
    device='cuda'
):

    # 두 모델의 입력 데이터를 받고고 프로젝션 후 pair-wise Cosine Distance를 계산하는 함수
    with torch.no_grad():

        # model1 
        if is_model1_text:
            output1 = model1(inputs1)  
        else:
            output1 = model1(inputs1.float())  # 이미지는 float
        output1 = proj1(output1).cpu().numpy() # Projection Layer

        n_pairs = output1.shape[0] // 2
        dists1 = []
        for i in range(n_pairs):
            ya = output1[i]
            yb = output1[i + n_pairs]
            dist = spatial.distance.cosine(ya, yb) # Cosine Distance 계산
            dists1.append(dist)

        # model2
        if is_model2_text:
            output2 = model2(inputs2)  
        else:
            output2 = model2(inputs2.float())  # 이미지는 float
        output2 = proj2(output2).cpu().numpy() # Projection Layer

        dists2 = []
        for i in range(n_pairs):
            ya = output2[i]
            yb = output2[i + n_pairs]
            dist = spatial.distance.cosine(ya, yb) # Cosine Distance 계산
            dists2.append(dist)

    return np.array(dists1), np.array(dists2)

In [16]:
# DDV 계산: Euclidean Distance 
def compute_ddv_euc(
    model1, model2,
    inputs1, inputs2,
    proj1, proj2,
    is_model1_text=False,
    is_model2_text=False,
    device='cuda'
):
    with torch.no_grad():
        # model1
        if is_model1_text:
            output1 = model1(inputs1)        
        else:
            output1 = model1(inputs1.float()) 
        output1 = proj1(output1).cpu().numpy()

        n_pairs = output1.shape[0] // 2
        dists1 = []
        for i in range(n_pairs):
            ya = output1[i]
            yb = output1[i + n_pairs]
            dist = spatial.distance.euclidean(ya, yb)
            dists1.append(dist)

        # model2
        if is_model2_text:
            output2 = model2(inputs2)
        else:
            output2 = model2(inputs2.float())
        output2 = proj2(output2).cpu().numpy()

        dists2 = []
        for i in range(n_pairs):
            ya = output2[i]
            yb = output2[i + n_pairs]
            dist = spatial.distance.euclidean(ya, yb)
            dists2.append(dist)

    return np.array(dists1), np.array(dists2)

In [17]:
# Cosine Distance 계산 함수
def compute_sim_cos(ddv1, ddv2):
    return spatial.distance.cosine(ddv1, ddv2)

### modelDiff 

In [18]:
ddvcc_list_all = []  # (target_id, task_id, cos-distance) 
ddvec_list_all = []  # (target_id, task_id, euc-distance) 


# target_id vs. task_id
for target_id in range(12):
    print(f"\n================= TARGET MODEL: {target_id} =================")
    
    is_target_text = (target_id < 6) # Target 모델이 텍스트 모델인지 확인

    for task_id in range(12):
        if task_id == target_id: # 동일 모델은 skip
            continue

        is_task_text = (task_id < 6) # task 모델이 텍스트 모델인지 확인

        # target
        batch_target = next(iter(valid_loaders[target_id]))
        if is_target_text:
            # 텍스트 (B, seq_len)
            inputs_target = batch_target['input_ids'].to(DEVICE).long()
        else:
            # 이미지 (B, T, F)
            inputs_target = batch_target[0].to(DEVICE)

        # task 
        batch_task = next(iter(valid_loaders[task_id]))
        if is_task_text:
            inputs_task = batch_task['input_ids'].to(DEVICE).long()
        else:
            inputs_task = batch_task[0].to(DEVICE)

        # DDV Cosine 
        ddv1, ddv2 = compute_ddv_cos(
            model1=input_models[target_id],
            model2=input_models[task_id],
            inputs1=inputs_target,
            inputs2=inputs_task,
            proj1=(text_projection if is_target_text else image_projection),
            proj2=(text_projection if is_task_text else image_projection),
            is_model1_text=is_target_text,
            is_model2_text=is_task_text,
            device=DEVICE
        )
        ddv_distance_cos = compute_sim_cos(ddv1, ddv2)  # cos-distance

        # DDV Euclidean 
        ddv1, ddv2 = compute_ddv_euc(
            model1=input_models[target_id],
            model2=input_models[task_id],
            inputs1=inputs_target,
            inputs2=inputs_task,
            proj1=(text_projection if is_target_text else image_projection),
            proj2=(text_projection if is_task_text else image_projection),
            is_model1_text=is_target_text,
            is_model2_text=is_task_text,
            device=DEVICE
        )
        ddv_distance_euc = compute_sim_cos(ddv1, ddv2)  # euc-distance

        # 결과 출력
        print(f"[task_id {task_id} => target_id {target_id}] "
              f"DDV cos-cos: {ddv_distance_cos:.5f}, "
              f"DDV euc-cos: {ddv_distance_euc:.5f}")

        # 리스트에 기록
        ddvcc_list_all.append((target_id, task_id, ddv_distance_cos))
        ddvec_list_all.append((target_id, task_id, ddv_distance_euc))


[task_id 1 => target_id 0] DDV cos-cos: 0.24807, DDV euc-cos: 0.15501
[task_id 2 => target_id 0] DDV cos-cos: 0.25558, DDV euc-cos: 0.16784
[task_id 3 => target_id 0] DDV cos-cos: 0.26441, DDV euc-cos: 0.23380
[task_id 4 => target_id 0] DDV cos-cos: 0.26474, DDV euc-cos: 0.22376
[task_id 5 => target_id 0] DDV cos-cos: 0.34324, DDV euc-cos: 0.23314
[task_id 6 => target_id 0] DDV cos-cos: 0.26272, DDV euc-cos: 0.17879
[task_id 7 => target_id 0] DDV cos-cos: 0.29565, DDV euc-cos: 0.16340
[task_id 8 => target_id 0] DDV cos-cos: 0.21350, DDV euc-cos: 0.13613
[task_id 9 => target_id 0] DDV cos-cos: 0.31290, DDV euc-cos: 0.19864
[task_id 10 => target_id 0] DDV cos-cos: 0.44360, DDV euc-cos: 0.29714
[task_id 11 => target_id 0] DDV cos-cos: 0.65315, DDV euc-cos: 0.42663

[task_id 0 => target_id 1] DDV cos-cos: 0.24807, DDV euc-cos: 0.15501
[task_id 2 => target_id 1] DDV cos-cos: 0.34595, DDV euc-cos: 0.26451
[task_id 3 => target_id 1] DDV cos-cos: 0.26152, DDV euc-cos: 0.19815
[task_id 4 => ta

## task_id별 best target_id 찾기

In [19]:
from collections import defaultdict


# cos-distance 기준

data_by_task_cos = defaultdict(list)

for (t_id, s_id, dist) in ddvcc_list_all:
    data_by_task_cos[s_id].append((t_id, dist))

# task_id마다 distance가 가장 작은 target_id 찾음
best_target_for_task_cos = {}
for s_id, pairs in data_by_task_cos.items():
    best_t_id = None
    min_dist = float("inf")
    for t_id, dist in pairs:
        if dist < min_dist:
            min_dist = dist
            best_t_id = t_id
    best_target_for_task_cos[s_id] = (best_t_id, min_dist)

print("\n===== Best target_id by cos-distance for each task_id =====")
for s_id in sorted(best_target_for_task_cos.keys()):
    t_id, dist = best_target_for_task_cos[s_id]
    print(f"- task_id {s_id}: best target_id = {t_id}, cos_distance = {dist:.5f}")


# euc-distance 기준
data_by_task_euc = defaultdict(list)
for (t_id, s_id, dist) in ddvec_list_all:
    data_by_task_euc[s_id].append((t_id, dist))

best_target_for_task_euc = {}
for s_id, pairs in data_by_task_euc.items():
    best_t_id = None
    min_dist = float("inf")
    for t_id, dist in pairs:
        if dist < min_dist:
            min_dist = dist
            best_t_id = t_id
    best_target_for_task_euc[s_id] = (best_t_id, min_dist)

print("\n===== Best target_id by euc-distance for each task_id =====")
for s_id in sorted(best_target_for_task_euc.keys()):
    t_id, dist = best_target_for_task_euc[s_id]
    print(f"- task_id {s_id}: best target_id = {t_id}, euc_distance = {dist:.5f}")


===== Best target_id by cos-distance for each task_id =====
- task_id 0: best target_id = 8, cos_distance = 0.21350
- task_id 1: best target_id = 5, cos_distance = 0.14462
- task_id 2: best target_id = 0, cos_distance = 0.25558
- task_id 3: best target_id = 9, cos_distance = 0.20899
- task_id 4: best target_id = 8, cos_distance = 0.25263
- task_id 5: best target_id = 1, cos_distance = 0.14462
- task_id 6: best target_id = 3, cos_distance = 0.24985
- task_id 7: best target_id = 1, cos_distance = 0.24088
- task_id 8: best target_id = 0, cos_distance = 0.21350
- task_id 9: best target_id = 3, cos_distance = 0.20899
- task_id 10: best target_id = 5, cos_distance = 0.21853
- task_id 11: best target_id = 1, cos_distance = 0.37387

===== Best target_id by euc-distance for each task_id =====
- task_id 0: best target_id = 8, euc_distance = 0.13613
- task_id 1: best target_id = 5, euc_distance = 0.13168
- task_id 2: best target_id = 7, euc_distance = 0.13323
- task_id 3: best target_id = 9, euc