In [6]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
import torch
import torch
import sys
import os
print(os.getcwd())
module_path = os.path.abspath(os.path.join('/data2/david3684/2024_arithmetic'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
import numpy as np
from src.eval import eval_single_dataset_with_prediction, eval_single_dataset
from src.main import save_scale_factors
from src.args import parse_arguments
from src.datasets.common import get_dataloader, maybe_dictionarize
from src.datasets.registry import get_dataset
from src.modeling import ImageEncoder, ImageClassifier
from src.task_vectors import TaskVector
from tqdm import tqdm
from copy import deepcopy
import matplotlib.pyplot as plt
from datetime import datetime
import pickle
import open_clip

import csv
import ray
from easydict import EasyDict
from datetime import datetime

/data2/david3684/2024_arithmetic/src


In [8]:
# Cars, DTD, EuroSAT, GTSRB, MNIST, RESISC45, SUN397, SVHN
class Args:
    def __init__(self):
        self.model = 'ViT-L-14'
        self.tasks = ["Cars", "DTD", "EuroSAT", "GTSRB", "MNIST", "RESISC45", "SUN397", "SVHN"] 
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.task_scale_factors = None
        self.save = '/data2/david3684/2024_arithmetic/checkpoints/ViT-L-14'
        self.data_location = '/data1/common_datasets/vision_cls/'
        self.eval_datasets = None
        self.train_dataset = None
        self.exp_name = None
        self.results_db = None
        self.batch_size = 256
        self.lr = 0.001
        self.wd = 0.1
        self.ls = 0.0
        self.warmup_length = 500
        self.epochs = 10
        self.load = None
        self.cache_dir = None
        self.openclip_cachedir = '/data2/david3684/.cache/open_clip'
        self.initial_rank_ratio = 1.0
        self.low_rank_mode = 'SoRA'
        self.pretrained_model = 'openai'
        self.scale_shared_weight = False
        self.num_test_samples = 2048
        self.no_shared_weight = False

args = Args()

In [9]:
# Load finetuned weight
["Cars", "DTD", "EuroSAT", "GTSRB", "MNIST", "RESISC45", "SUN397", "SVHN"]

model_list = {}
for idx, name in enumerate(args.tasks):
    model = torch.load(f'/data1/common_datasets/shared_weight/task_vector/ViT-L-14/{name}/finetuned.pt').to(args.device)
    model_list[name] = model

# model_1 = torch.load('/data2/david3684/2024_arithmetic/checkpoints/ViT-L-14/DTD/finetuned.pt').to(args.device)
# model_2 = torch.load('/data2/david3684/2024_arithmetic/checkpoints/ViT-L-14/SUN397/finetuned.pt').to(args.device)


In [10]:
def transform_key(old_key):
    if old_key.startswith('shared.attn.layer') or old_key.startswith('clip_vit'):
        parts = old_key.split('.')
        layer_idx = parts[3]
        # print(layer_idx)
        sub_key = parts[4]
        if sub_key in ['q', 'k', 'v']:
            return f'model.visual.transformer.resblocks.{layer_idx}.attn.{sub_key}_weight'
        elif sub_key == 'out_proj':
            return f'model.visual.transformer.resblocks.{layer_idx}.attn.out_proj.weight'
        elif sub_key == 'c_fc' or sub_key == 'c_proj':
            return f'model.visual.transformer.resblocks.{layer_idx}.mlp.{sub_key}.weight'
    return old_key

In [11]:
def format_shared_weight(shared_weight_state_dict, open_clip_state_dict_template):
    qkv_store = {}
    for old_key, value in shared_weight_state_dict.items():
        if 'diff' in old_key or 'scale_dict' in old_key:
            continue

        new_key = transform_key(old_key)
        layer_idx = new_key.split('.')[4]

        if layer_idx not in qkv_store:
            qkv_store[layer_idx] = {'q': None, 'k': None, 'v': None}

        weight_type = new_key.split('.')[-1]
        # in_proj.weight (q, k, v)
        if weight_type in ['q_weight', 'k_weight', 'v_weight']:
            qkv_store[layer_idx][weight_type[0]] = value
        else:  # out_proj.weight, c_fc.weight, c_proj.weight
            assert new_key in open_clip_state_dict_template
            open_clip_state_dict_template[new_key] = value

    for layer_idx, qkv in qkv_store.items():
        if all(v.bool().all().item() for v in qkv.values()):
            in_proj_weight = torch.cat([qkv['q'], qkv['k'], qkv['v']], dim=0)
            # concat qkv into 3072*1024 tensor
            new_key = f'model.visual.transformer.resblocks.{layer_idx}.attn.in_proj_weight'
            assert new_key in open_clip_state_dict_template
            open_clip_state_dict_template[new_key] = in_proj_weight
        else:
            print(
                f"Missing q, k, or v for layer {layer_idx}. q: {qkv['q']}, k: {qkv['k']}, v: {qkv['v']}")

    return open_clip_state_dict_template

#나머지 처리 필요

In [12]:
# format shaed weight into openclip state dict
shared_weight_state_dict = torch.load('/data1/common_datasets/shared_weight/20241025/vanilla_T8/rankmin_config_20241025_uni_T8_vanilla.bin')
zero_shot_encoder = ImageEncoder(args, keep_lang=False)

formatted_shared_weight = format_shared_weight(shared_weight_state_dict, zero_shot_encoder.state_dict())
print(zero_shot_encoder.load_state_dict(formatted_shared_weight))
zero_shot_encoder = zero_shot_encoder.to(args.device)

Loading ViT-L-14 pre-trained weights.
<All keys matched successfully>


In [7]:
low_rank_task_vectors = {}
for task in args.tasks:
    model = model_list[task]
    finetuned_state_dict = model.state_dict()
    low_rank_task_vectors[task] = TaskVector(args, zero_shot_encoder.state_dict(), finetuned_state_dict, task).to(args.device)

low_rank_task_vector_sum = sum(low_rank_task_vectors.values())  
low_rank_single_task_encoder = low_rank_task_vectors[task].apply_to(deepcopy(zero_shot_encoder), scaling_coef=1.0)

eval_single_dataset(low_rank_single_task_encoder, "Cars", deepcopy(args))

Building task vector with shared weight
Building task vector with shared weight
Building task vector with shared weight
Building task vector with shared weight
Building task vector with shared weight
Building task vector with shared weight
Building task vector with shared weight
Building task vector with shared weight


In [10]:
# ray runner
ray.shutdown()
ray.init()

def eval_single_task_wrapper(is_single, encoder_state_dict, task, args, scale_coef):
    
    encoder = ImageEncoder(args, keep_lang=False)
    encoder.load_state_dict(encoder_state_dict)
    result = eval_single_dataset(encoder, task, args)

    if is_single:
        print(f"Running single task evaluation for {task}: {result}")
    else:
        print(f"Running multitask task evaluation for {task}: {result}")

    ret = {
        "is_single": is_single,
        "task": task,
        "top1": result["top1"],
        "scaling_coef": 1.0,
        "initial_rank_ratio_list": args.initial_rank_ratio,
    }
    if not is_single:
        ret["scaling_coef"] = scale_coef

    return ret

eval_ray_runner = ray.remote(eval_single_task_wrapper).options(num_gpus=0.5, num_cpus=4)
ray_pack = []

experiment_vector = EasyDict()
experiment_vector.initial_rank_ratio_list = [0.0, 0.001, 0.005, 0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.5, 0.64, 1.0]
experiment_vector.scaling_coef_list = [0.2, 0.5, 1.0]

for initial_rank_ratio in experiment_vector.initial_rank_ratio_list:
    print(f'Initial rank ratio: {initial_rank_ratio}')
    args.initial_rank_ratio = initial_rank_ratio
    low_rank_task_vectors = {}
    # Build low rank task vectors
    for task in args.tasks:
        model = model_list[task]
        finetuned_state_dict = model.state_dict()
        low_rank_task_vectors[task] = TaskVector(args, zero_shot_encoder.state_dict(), finetuned_state_dict, task).to(args.device)
    
    low_rank_task_vector_sum = sum(low_rank_task_vectors.values())  
    
    for task in args.tasks:
        for each_scale_factor in experiment_vector.scaling_coef_list: 
            # print(f'Evaluate {task}')
            # Evaluate sinlge task model
            low_rank_single_task_encoder = low_rank_task_vectors[task].apply_to(deepcopy(zero_shot_encoder), scaling_coef=1.0)
            # single_task_reuslt = eval_single_dataset(low_rank_single_task_encoder, task, deepcopy(args))
            ray_pack.append(eval_ray_runner.remote(True, low_rank_single_task_encoder.state_dict(), task, deepcopy(args), each_scale_factor))
            print(f'Single task model for {task} fetched')
            
            # Evaluate multi task model
            # print('Multi task model')
            low_rank_multi_task_encoder = low_rank_task_vector_sum.apply_to(deepcopy(zero_shot_encoder), scaling_coef=each_scale_factor)
            # multi_task_result = eval_single_dataset(low_rank_multi_task_encoder, task, deepcopy(args))
            ray_pack.append(eval_ray_runner.remote(False, low_rank_multi_task_encoder.state_dict(), task, deepcopy(args), each_scale_factor))
            print(f'Multi task model for {task} fetched')

results = ray.get(ray_pack)

current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
csv_file_path = f"./eval_results_{current_time}.csv"
with open(csv_file_path, mode="w", newline="") as csv_file:
    fieldnames = ["is_single", "task", "top1", "initial_rank_ratio_list", "scaling_coef"]
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    for result in results:
        writer.writerow(result)

ray.shutdown()


2024-10-24 20:40:24,362	INFO worker.py:1786 -- Started a local Ray instance.


Building task vector with shared weight
Single task model for Cars fetched
Multi task model for Cars fetched
Single task model for Cars fetched
Multi task model for Cars fetched
Single task model for Cars fetched
Multi task model for Cars fetched
Building task vector with shared weight


2024-10-24 20:41:21,797	ERROR worker.py:409 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::eval_single_task_wrapper()[39m (pid=2523786, ip=172.17.0.9)
RuntimeError: The remote function failed to import on the worker. This may be because needed library dependencies are not installed in the worker environment or cannot be found from sys.path ['/data2/david3684/2024_arithmetic/src', '/home/david3684/.local/lib/python3.10/site-packages/ray/thirdparty_files', '/home/david3684/.local/lib/python3.10/site-packages/ray/_private/workers', '/data2/david3684/2024_arithmetic/src', '/data2/david3684/Diffusion-Assignment3-ControlNet-LoRA/task_2_lora', '/opt/conda/lib/python310.zip', '/opt/conda/lib/python3.10', '/opt/conda/lib/python3.10/lib-dynload', '/home/david3684/.local/lib/python3.10/site-packages', '/opt/conda/lib/python3.10/site-packages']:

[36mray::eval_single_task_wrapper()[39m (pid=2523786, ip=172.17.0.9)
ModuleNotFoundError: No module named 'src'
2024-10

Single task model for Cars fetched


2024-10-24 20:41:30,811	ERROR worker.py:409 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::eval_single_task_wrapper()[39m (pid=2523783, ip=172.17.0.9)
RuntimeError: The remote function failed to import on the worker. This may be because needed library dependencies are not installed in the worker environment or cannot be found from sys.path ['/data2/david3684/2024_arithmetic/src', '/home/david3684/.local/lib/python3.10/site-packages/ray/thirdparty_files', '/home/david3684/.local/lib/python3.10/site-packages/ray/_private/workers', '/data2/david3684/2024_arithmetic/src', '/data2/david3684/Diffusion-Assignment3-ControlNet-LoRA/task_2_lora', '/opt/conda/lib/python310.zip', '/opt/conda/lib/python3.10', '/opt/conda/lib/python3.10/lib-dynload', '/home/david3684/.local/lib/python3.10/site-packages', '/opt/conda/lib/python3.10/site-packages']:

[36mray::eval_single_task_wrapper()[39m (pid=2523783, ip=172.17.0.9)
ModuleNotFoundError: No module named 'src'


Multi task model for Cars fetched


2024-10-24 20:41:34,924	ERROR worker.py:409 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::eval_single_task_wrapper()[39m (pid=2523771, ip=172.17.0.9)
RuntimeError: The remote function failed to import on the worker. This may be because needed library dependencies are not installed in the worker environment or cannot be found from sys.path ['/data2/david3684/2024_arithmetic/src', '/home/david3684/.local/lib/python3.10/site-packages/ray/thirdparty_files', '/home/david3684/.local/lib/python3.10/site-packages/ray/_private/workers', '/data2/david3684/2024_arithmetic/src', '/data2/david3684/Diffusion-Assignment3-ControlNet-LoRA/task_2_lora', '/opt/conda/lib/python310.zip', '/opt/conda/lib/python3.10', '/opt/conda/lib/python3.10/lib-dynload', '/home/david3684/.local/lib/python3.10/site-packages', '/opt/conda/lib/python3.10/site-packages']:

[36mray::eval_single_task_wrapper()[39m (pid=2523771, ip=172.17.0.9)
ModuleNotFoundError: No module named 'src'


Single task model for Cars fetched
Multi task model for Cars fetched


2024-10-24 20:41:37,819	ERROR worker.py:409 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::eval_single_task_wrapper()[39m (pid=2523771, ip=172.17.0.9)
RuntimeError: The remote function failed to import on the worker. This may be because needed library dependencies are not installed in the worker environment or cannot be found from sys.path ['/data2/david3684/2024_arithmetic/src', '/home/david3684/.local/lib/python3.10/site-packages/ray/thirdparty_files', '/home/david3684/.local/lib/python3.10/site-packages/ray/_private/workers', '/data2/david3684/2024_arithmetic/src', '/data2/david3684/Diffusion-Assignment3-ControlNet-LoRA/task_2_lora', '/opt/conda/lib/python310.zip', '/opt/conda/lib/python3.10', '/opt/conda/lib/python3.10/lib-dynload', '/home/david3684/.local/lib/python3.10/site-packages', '/opt/conda/lib/python3.10/site-packages']:

[36mray::eval_single_task_wrapper()[39m (pid=2523771, ip=172.17.0.9)
ModuleNotFoundError: No module named 'src'


Single task model for Cars fetched


2024-10-24 20:41:39,812	ERROR worker.py:409 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::eval_single_task_wrapper()[39m (pid=2523771, ip=172.17.0.9)
RuntimeError: The remote function failed to import on the worker. This may be because needed library dependencies are not installed in the worker environment or cannot be found from sys.path ['/data2/david3684/2024_arithmetic/src', '/home/david3684/.local/lib/python3.10/site-packages/ray/thirdparty_files', '/home/david3684/.local/lib/python3.10/site-packages/ray/_private/workers', '/data2/david3684/2024_arithmetic/src', '/data2/david3684/Diffusion-Assignment3-ControlNet-LoRA/task_2_lora', '/opt/conda/lib/python310.zip', '/opt/conda/lib/python3.10', '/opt/conda/lib/python3.10/lib-dynload', '/home/david3684/.local/lib/python3.10/site-packages', '/opt/conda/lib/python3.10/site-packages']:

[36mray::eval_single_task_wrapper()[39m (pid=2523771, ip=172.17.0.9)
ModuleNotFoundError: No module named 'src'


Multi task model for Cars fetched
Building task vector with shared weight


2024-10-24 20:41:41,805	ERROR worker.py:409 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::eval_single_task_wrapper()[39m (pid=2523771, ip=172.17.0.9)
RuntimeError: The remote function failed to import on the worker. This may be because needed library dependencies are not installed in the worker environment or cannot be found from sys.path ['/data2/david3684/2024_arithmetic/src', '/home/david3684/.local/lib/python3.10/site-packages/ray/thirdparty_files', '/home/david3684/.local/lib/python3.10/site-packages/ray/_private/workers', '/data2/david3684/2024_arithmetic/src', '/data2/david3684/Diffusion-Assignment3-ControlNet-LoRA/task_2_lora', '/opt/conda/lib/python310.zip', '/opt/conda/lib/python3.10', '/opt/conda/lib/python3.10/lib-dynload', '/home/david3684/.local/lib/python3.10/site-packages', '/opt/conda/lib/python3.10/site-packages']:

[36mray::eval_single_task_wrapper()[39m (pid=2523771, ip=172.17.0.9)
ModuleNotFoundError: No module named 'src'
2024-10

KeyboardInterrupt: 

In [14]:
# loop for task vector rank

experiment_vector = EasyDict()
experiment_vector.initial_rank_ratio_list = [1.0]
experiment_vector.scaling_coef_list = [1.0, 0.3]

for initial_rank_ratio in experiment_vector.initial_rank_ratio_list:
    args.initial_rank_ratio = initial_rank_ratio
    low_rank_task_vectors = {}
    
    # Build low rank task vectors
    for task in args.tasks:
        model = model_list[task]
        finetuned_state_dict = model.state_dict()
        low_rank_task_vectors[task] = TaskVector(args, zero_shot_encoder.state_dict(), finetuned_state_dict, task).to(args.device)
    
    low_rank_task_vector_sum = sum(low_rank_task_vectors.values())  

    # 여기 ray로 parallelize 시켜버리고 값 wandb logging하게 만든 다음에
    # ray.get()으로 받아서 csv로 쓰게 만들어 버려야 겠다.
    


Building task vector with shared weight
Building task vector with shared weight
Building task vector with shared weight
Building task vector with shared weight
Building task vector with shared weight
Building task vector with shared weight
Building task vector with shared weight
Building task vector with shared weight


In [16]:
for task in ["DTD", "SUN397", "Cars", "EuroSAT", "GTSRB", "MNIST", "RESISC45", "SVHN", ]:
    low_rank_single_task_encoder = low_rank_task_vectors[task].apply_to(deepcopy(zero_shot_encoder), scaling_coef=1.0)
    single_task_reuslt = eval_single_dataset(low_rank_single_task_encoder, task, deepcopy(args))
        
    # for each_scale_factor in experiment_vector.scaling_coef_list: 
    #     print(f'Evaluate {task}')
    #     # Evaluate sinlge task model
    #     print('Single task model')

    #     # Evaluate multi task model
    #     print('Multi task model')
    #     low_rank_multi_task_encoder = low_rank_task_vector_sum.apply_to(deepcopy(zero_shot_encoder), scaling_coef=each_scale_factor)
    #     multi_task_result = eval_single_dataset(low_rank_multi_task_encoder, task, deepcopy(args))

Did not find classification head for ViT-L-14 on EuroSAT at /data2/david3684/2024_arithmetic/checkpoints/ViT-L-14/head_EuroSAT_openai.pt, building one from scratch.
Loading ViT-L-14 pre-trained weights.
Building classification head.


100%|██████████| 10/10 [00:00<00:00, 33.64it/s]


Saving classification head to /data2/david3684/2024_arithmetic/checkpoints/ViT-L-14/head_EuroSAT_openai.pt


100%|██████████| 11/11 [00:43<00:00,  3.93s/it]


Done evaluating on EuroSAT. Accuracy: 99.93%
Classification head for ViT-L-14 on GTSRB exists at /data2/david3684/2024_arithmetic/checkpoints/ViT-L-14/head_GTSRB_openai.pt
Loading classification head from /data2/david3684/2024_arithmetic/checkpoints/ViT-L-14/head_GTSRB_openai.pt


100%|██████████| 50/50 [02:34<00:00,  3.10s/it]


Done evaluating on GTSRB. Accuracy: 99.24%
Classification head for ViT-L-14 on MNIST exists at /data2/david3684/2024_arithmetic/checkpoints/ViT-L-14/head_MNIST_openai.pt
Loading classification head from /data2/david3684/2024_arithmetic/checkpoints/ViT-L-14/head_MNIST_openai.pt


100%|██████████| 40/40 [02:03<00:00,  3.10s/it]


Done evaluating on MNIST. Accuracy: 99.69%
Classification head for ViT-L-14 on RESISC45 exists at /data2/david3684/2024_arithmetic/checkpoints/ViT-L-14/head_RESISC45_openai.pt
Loading classification head from /data2/david3684/2024_arithmetic/checkpoints/ViT-L-14/head_RESISC45_openai.pt


100%|██████████| 25/25 [01:25<00:00,  3.42s/it]


Done evaluating on RESISC45. Accuracy: 2.70%
Classification head for ViT-L-14 on SVHN exists at /data2/david3684/2024_arithmetic/checkpoints/ViT-L-14/head_SVHN_openai.pt
Loading classification head from /data2/david3684/2024_arithmetic/checkpoints/ViT-L-14/head_SVHN_openai.pt
Using downloaded and verified file: /data1/common_datasets/vision_cls/svhn/train_32x32.mat
Using downloaded and verified file: /data1/common_datasets/vision_cls/svhn/test_32x32.mat


100%|██████████| 102/102 [05:01<00:00,  2.95s/it]


Done evaluating on SVHN. Accuracy: 98.12%
Classification head for ViT-L-14 on DTD exists at /data2/david3684/2024_arithmetic/checkpoints/ViT-L-14/head_DTD_openai.pt
Loading classification head from /data2/david3684/2024_arithmetic/checkpoints/ViT-L-14/head_DTD_openai.pt
Number of classes: 47
Train dataset size: 1880
Test dataset size: 1880


 25%|██▌       | 2/8 [00:28<01:26, 14.36s/it]


KeyboardInterrupt: 

In [13]:
# _, _, val_preprocess = open_clip.create_model_and_transforms(
#             args.model, pretrained='openai', cache_dir=args.openclip_cachedir)
# dataset_1 = get_dataset(
#         args.tasks[0],
#         val_preprocess,
#         location=args.data_location,
#         batch_size=args.batch_size,
#         num_workers=16,
#         num_test_samples=None,
#     )
# dataloader_1 = get_dataloader(
#     dataset_1, is_train=False, args=args, image_encoder=None)

# dataset_2 = get_dataset(
#         args.tasks[1],
#         val_preprocess,
#         location=args.data_location,
#         batch_size=args.batch_size,
#         num_workers=16,
#         num_test_samples=args.num_test_samples,
#     )
# dataloader_2 = get_dataloader(
#     dataset_2, is_train=False, args=args, image_encoder=None)

Number of classes: 47
Train dataset size: 1880
Test dataset size: 1880
Downloading and loading the SUN397 dataset...
Number of classes: 397
Train dataset size: 87003
Test dataset size: 2048
