In [2]:
import os
import sys
sys.path.append("../bottlenecks")
import configs
from cbm import *
from data_utils import *
from trainer_utils import *
from graph_plot_tools import *
from utils import *
from metric_utils import *
from peft import LoraConfig, get_peft_model
from typing import List, Dict, Optional
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"

2024-03-24 18:03:23.661726: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-24 18:03:23.661811: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-24 18:03:23.661842: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-24 18:03:23.670663: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
configs.set_seed(42)
device = configs.set_device(5)

There are 8 GPU(s) available.
We will use the GPU: NVIDIA A100-PCIE-40GB


In [4]:
torch.cuda.get_device_name(device)

'NVIDIA A100-PCIE-40GB'

In [5]:
plt.style.use("seaborn-v0_8")

**CUB**

In [None]:
with open("../data/cub_filtered.txt", "r") as f:
    concepts = f.read().split('\n')

In [6]:
with open("../data/cub_classes.txt", "r") as f:
    class_names = f.read().split('\n')

In [7]:
len(class_names)

200

In [8]:
class_names[:10]

['Black footed Albatross',
 'Laysan Albatross',
 'Sooty Albatross',
 'Groove billed Ani',
 'Crested Auklet',
 'Least Auklet',
 'Parakeet Auklet',
 'Rhinoceros Auklet',
 'Brewer Blackbird',
 'Red winged Blackbird']

In [10]:
class_names = [c.lower() for c in class_names]

In [12]:
train_loader_preprocessed  = prepared_dataloaders(Constants.cub200_link,
                                                  concepts=class_names,
                                                  prep_loaders="train",
                                                  batch_size=128,
                                                  backbone_name=Constants.clip_large_link,
                                                 )

  0%|          | 0/74 [00:00<?, ?it/s]

In [4]:
def accuracy(output, target, topk=(1,)):
    maxk = max(topk)
    batch_size = target.size(0)
    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred)).contiguous()
    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

In [7]:
@torch.no_grad()
def zeroshoting_backbone_model(model, loader, precision_metric, recall_metric, f1_metric):
    top_1_accs, top_5_accs = [], []
    top_1_precisions, top_1_recalls = [], []
    top_1_f1scores = []
    model.to(device)
    with torch.no_grad():
        for step, batch in tqdm(enumerate(loader, 0)):
            warnings.filterwarnings("ignore")
            inputs, labels = batch
            inputs, targets = inputs.to(device), torch.LongTensor(labels).to(
                device
            )
            logits_per_image = model(**inputs).logits_per_image
            top_1, top_5 = accuracy(logits_per_image, targets, topk=(1, 5))
            top_1_accs.append(top_1.detach().cpu().item())
            top_5_accs.append(top_5.detach().cpu().item())

            precs = precision_metric.compute(
                    predictions=logits_per_image.argmax(dim=-1).cpu(),
                    references=targets.cpu(),
                    average="weighted",
                )
            recs = recall_metric.compute(
                    predictions=logits_per_image.argmax(dim=-1).cpu(),
                    references=targets.cpu(),
                    average="weighted",
                )
            f1 = f1_metric.compute(
                    predictions=logits_per_image.argmax(dim=-1).cpu(),
                    references=targets.cpu(),
                    average="weighted",
                    labels=np.unique(logits_per_image.argmax(dim=-1).cpu()),
                )
            
            top_1_precisions.append(precs["precision"])
            top_1_recalls.append(recs["recall"])
            top_1_f1scores.append(f1["f1"])
            
    return (
        np.mean(top_1_accs),
        np.mean(top_5_accs),
        np.mean(top_1_precisions),
        np.mean(top_1_recalls),
        np.mean(top_1_f1scores),
    )

In [8]:
import evaluate
model = transformers.CLIPModel.from_pretrained(Constants.clip_large_link)
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")
f1_metric = evaluate.load("f1")

In [9]:
print_trainable_parameters(model)

trainable params: 427,616,513 || all params: 427,616,513 || trainable%: 100.00


In [21]:
model

CLIPModel(
  (text_model): CLIPTextTransformer(
    (embeddings): CLIPTextEmbeddings(
      (token_embedding): Embedding(49408, 768)
      (position_embedding): Embedding(77, 768)
    )
    (encoder): CLIPEncoder(
      (layers): ModuleList(
        (0-11): 12 x CLIPEncoderLayer(
          (self_attn): CLIPAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (layer_norm1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (mlp): CLIPMLP(
            (activation_fn): QuickGELUActivation()
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
          )
          (layer_norm2): LayerNorm((768,), eps=1e-05,

In [None]:
top_1_accs, top_5_accs, \
top_1_precisions, top_1_recalls, \
top_1_f1scores = zeroshoting_backbone_model(model, 
                                        train_loader_preprocessed,
                                        precision_metric,
                                        recall_metric,
                                        f1_metric,
                                        )

In [24]:
top_1_accs

62.63503688090557

In [25]:
top_5_accs

90.32546347540779

In [26]:
top_1_precisions

0.6396276499973812

In [27]:
top_1_recalls

0.6263503692646135

In [28]:
top_1_f1scores

0.8121238334459757

#### cifar10

In [10]:
with open("../data/cifar10_classes.txt", "r") as f:
    class_names = f.read().split('\n')

In [14]:
train_loader_preprocessed  = prepared_dataloaders(Constants.cifar10_link,
                                                  concepts=class_names,
                                                  prep_loaders="train",
                                                  batch_size=32,
                                                  backbone_name=Constants.clip_large_link,
                                                 )

  0%|          | 0/1500 [00:00<?, ?it/s]

In [None]:
top_1_accs, top_5_accs, \
top_1_precisions, top_1_recalls, \
top_1_f1scores = zeroshoting_backbone_model(model, 
                                        train_loader_preprocessed,
                                        precision_metric,
                                        recall_metric,
                                        f1_metric,
                                        )

In [17]:
print("Top 1 Accuracy: ", top_1_accs, "\n")
print("Top 5 Accuracy: ", top_5_accs, "\n")
print("Top 1 Precision: ", top_1_precisions, "\n")
print("Top 1 Recall: ", top_1_recalls, "\n")
print("F1: ", top_1_f1scores, "\n")

Top 1 Accuracy:  81.79166666666667 

Top 5 Accuracy:  97.51041666666667 

Top 1 Precision:  0.8665994243025493 

Top 1 Recall:  0.8179166666666666 

F1:  0.8257207012040896 



#### cifar100

In [19]:
with open("../data/cifar100_classes.txt", "r") as f:
    class_names = f.read().split('\n')

In [23]:
train_loader_preprocessed  = prepared_dataloaders(Constants.cifar100_link,
                                                  concepts=class_names,
                                                  prep_loaders="train",
                                                  batch_size=32,
                                                  backbone_name=Constants.clip_large_link,
                                                 )

Downloading readme:   0%|          | 0.00/641 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/27.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/60000 [00:00<?, ? examples/s]

  0%|          | 0/1500 [00:00<?, ?it/s]

In [None]:
top_1_accs, top_5_accs, \
top_1_precisions, top_1_recalls, \
top_1_f1scores = zeroshoting_backbone_model(model, 
                                        train_loader_preprocessed,
                                        precision_metric,
                                        recall_metric,
                                        f1_metric,
                                        )

In [25]:
print("Top 1 Accuracy: ", top_1_accs, "\n")
print("Top 5 Accuracy: ", top_5_accs, "\n")
print("Top 1 Precision: ", top_1_precisions, "\n")
print("Top 1 Recall: ", top_1_recalls, "\n")
print("F1: ", top_1_f1scores, "\n")

Top 1 Accuracy:  52.84166666666667 

Top 5 Accuracy:  76.825 

Top 1 Precision:  0.5473391293891294 

Top 1 Recall:  0.5284166666666666 

F1:  0.8452402787050571 

