In [7]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
import sys
sys.path.append("../model/")
sys.path.append("../tools/")
from constants import *
from MLP_classifier import MultiClassClassifier
from dataset import FlickrAndPairs, TaskA, TaskAWithLabel, SimpleDataset
import torch
from torch.utils.data import DataLoader
import torch.nn as nn

In [19]:
train_data_dino = FlickrAndPairs(path="/data4/saland/data/flickr_and_pairs_DinoV2.pt",load_from_disk=True)
train_data_clip = FlickrAndPairs(path="/data4/saland/data/flickr_and_pairs.pt",load_from_disk=True)
test_data_dino  = TaskAWithLabel(path_to_csv="../../misc/scanFinal.csv",
                                 path_to_taskA="/data4/saland/data/taskA_dinoV2.pt")
test_data_clip  = TaskAWithLabel(path_to_csv="../../misc/scanFinal.csv",
                                 path_to_taskA="/data4/saland/data/taskA.pt")

100%|██████████| 10080/10080 [00:05<00:00, 1951.44it/s]
100%|██████████| 10080/10080 [00:04<00:00, 2093.75it/s]


### CLIP vs DINO

In [10]:
device = "cuda:0"
model_dino = MultiClassClassifier(n_features=DINO_FEATURE_DIM,n_classes=2).to(device)
model_clip = MultiClassClassifier(n_features=CLIP_FEATURE_DIM,n_classes=2).to(device)

model_clip.load_state_dict(torch.load(
    "../model/checkpoints/binary_train_real_fake_2k_fine_tune_meta_test.pt"))
model_dino.load_state_dict(torch.load(
    "../model/checkpoints/binary_train_real_fake_2k_fine_tune_meta_test.pt"))

model_clip.eval()
model_dino.eval()

lr = 1e-3
batch_size = 64
n_epochs = 1000

loss_fn = nn.CrossEntropyLoss()
optimizer_clip = torch.optim.SGD(model_clip.parameters(), lr=lr)
optimizer_dino = torch.optim.SGD(model_dino.parameters(), lr=lr)
model_dino.train()
model_clip.train()

rng = torch.Generator().manual_seed(SEED)

train_loader_dino = DataLoader(train_data_dino,batch_size=batch_size,shuffle=True,generator=rng)
train_loader_clip = DataLoader(train_data_clip,batch_size=batch_size,shuffle=True,generator=rng)

In [11]:
for epoch in range(1,n_epochs+1):
    for idx, batch in enumerate(train_loader_dino):
        # prediction and loss
        pred_dino = model_dino((batch["features"]).to(device))
        loss_dino = loss_fn(pred_dino,batch["label"].type(torch.LongTensor).to(device))


        # backpropagation
        
        loss_dino.backward()
        optimizer_dino.step()
        optimizer_dino.zero_grad()
        
    loss_dino, current = loss_dino.item(), idx*batch_size + len(batch["features"])
    if epoch%10 == 0 and epoch > 0:
        print(f"loss_dino: {loss_dino:>7f} [{epoch:>5d}/{n_epochs:>5d}]")

for epoch in range(1,n_epochs+1):
    for idx, batch in enumerate(train_loader_clip):
        
        pred_clip = model_clip(batch["features"].to(device))
        loss_clip = loss_fn(pred_clip,batch["label"].to(device))
        
        loss_clip.backward()
        optimizer_clip.step()
        optimizer_clip.zero_grad()
    
    loss_clip, current = loss_clip.item(), idx*batch_size + len(batch["features"])
    if epoch%10 == 0 and epoch > 0:
        print(f"loss_clip: {loss_clip:>7f} [{epoch:>5d}/{n_epochs:>5d}]")

loss_dino: 1.092763 [   10/ 1000]
loss_dino: 0.838894 [   20/ 1000]
loss_dino: 0.361515 [   30/ 1000]
loss_dino: 0.331113 [   40/ 1000]
loss_dino: 0.192977 [   50/ 1000]
loss_dino: 0.069331 [   60/ 1000]
loss_dino: 0.217783 [   70/ 1000]
loss_dino: 0.099533 [   80/ 1000]
loss_dino: 0.057134 [   90/ 1000]
loss_dino: 0.038878 [  100/ 1000]
loss_dino: 0.050931 [  110/ 1000]
loss_dino: 0.039315 [  120/ 1000]
loss_dino: 0.045511 [  130/ 1000]
loss_dino: 0.044440 [  140/ 1000]
loss_dino: 0.031986 [  150/ 1000]
loss_dino: 0.037977 [  160/ 1000]
loss_dino: 0.039411 [  170/ 1000]
loss_dino: 0.030647 [  180/ 1000]
loss_dino: 0.034220 [  190/ 1000]
loss_dino: 0.033869 [  200/ 1000]
loss_dino: 0.020640 [  210/ 1000]
loss_dino: 0.022107 [  220/ 1000]
loss_dino: 0.017908 [  230/ 1000]
loss_dino: 0.024085 [  240/ 1000]
loss_dino: 0.021152 [  250/ 1000]
loss_dino: 0.014152 [  260/ 1000]
loss_dino: 0.017818 [  270/ 1000]
loss_dino: 0.011804 [  280/ 1000]
loss_dino: 0.013579 [  290/ 1000]
loss_dino: 0.0

In [9]:
acc_clip = model_clip.get_model_accuracy_binary(test_data_clip.features,test_data_clip.label,binary_model=True,device=device)
acc_dino = model_dino.get_model_accuracy_binary(test_data_dino.features,test_data_dino.label,binary_model=True,device=device)
print("accuracy clip:",acc_clip)
print("accuracy dino:",acc_dino)

accuracy clip: 0.860185980796814
accuracy dino: 0.7680767774581909


### Concatenation of CLIP and DINOV2 features

In [20]:
train_clip_dino = SimpleDataset(features=torch.cat((train_data_clip.features,
                                                    train_data_dino.features),dim=1),
                                label=train_data_clip.label)

test_clip_dino = SimpleDataset(features=torch.cat((test_data_clip.features.to(device),
                                                   test_data_dino.features.to(device)),dim=1),
                                label= test_data_clip.label)

In [21]:
train_clip_dino.features.shape

torch.Size([3999, 1536])

In [22]:
device = "cuda:0"
model = MultiClassClassifier(n_features=CLIP_FEATURE_DIM+DINO_FEATURE_DIM,n_classes=2).to(device)
model.train()

lr = 1e-3
batch_size = 64
n_epochs = 1000

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

rng = rng = torch.Generator().manual_seed(SEED)
train_loader = DataLoader(train_clip_dino,batch_size=batch_size,shuffle=True,generator=rng)

for epoch in range(1,n_epochs+1):
    for idx, batch in enumerate(train_loader):
        
        pred = model(batch["features"].to(device))
        loss = loss_fn(pred,batch["label"].to(device))
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    
    loss, current = loss.item(), idx*batch_size + len(batch["features"])
    if epoch%10 == 0 and epoch > 0:
        print(f"loss_clip: {loss:>7f} [{epoch:>5d}/{n_epochs:>5d}]")

loss_clip: 0.366878 [   10/ 1000]
loss_clip: 0.214419 [   20/ 1000]
loss_clip: 0.161446 [   30/ 1000]
loss_clip: 0.221599 [   40/ 1000]
loss_clip: 0.096408 [   50/ 1000]
loss_clip: 0.037274 [   60/ 1000]
loss_clip: 0.096413 [   70/ 1000]
loss_clip: 0.044962 [   80/ 1000]
loss_clip: 0.045528 [   90/ 1000]
loss_clip: 0.022137 [  100/ 1000]
loss_clip: 0.053547 [  110/ 1000]
loss_clip: 0.032566 [  120/ 1000]
loss_clip: 0.034372 [  130/ 1000]
loss_clip: 0.047166 [  140/ 1000]
loss_clip: 0.026665 [  150/ 1000]
loss_clip: 0.036802 [  160/ 1000]
loss_clip: 0.043135 [  170/ 1000]
loss_clip: 0.014988 [  180/ 1000]
loss_clip: 0.031410 [  190/ 1000]
loss_clip: 0.027900 [  200/ 1000]
loss_clip: 0.010776 [  210/ 1000]
loss_clip: 0.018133 [  220/ 1000]
loss_clip: 0.011114 [  230/ 1000]
loss_clip: 0.020181 [  240/ 1000]
loss_clip: 0.011171 [  250/ 1000]
loss_clip: 0.009331 [  260/ 1000]
loss_clip: 0.019713 [  270/ 1000]
loss_clip: 0.006959 [  280/ 1000]
loss_clip: 0.013198 [  290/ 1000]
loss_clip: 0.0

In [15]:
model.get_model_accuracy_binary(test_clip_dino.features,test_clip_dino.label,device=device,binary_model=True)

0.891689121723175

### model used for submission vs fine-tuned vs clip + Dino

In [32]:
n = 500
taskA_train_data = SimpleDataset(test_data_clip.features[:n],test_data_clip.label[:n])
taskA_test_data  = SimpleDataset(test_data_clip.features[n:],test_data_clip.label[n:])

taskA_train_data_dino = SimpleDataset(test_data_dino.features[:n],test_data_dino.label[:n])
taskA_test_data_dino  = SimpleDataset(test_data_dino.features[n:],test_data_dino.label[n:])

In [28]:
model_sub = MultiClassClassifier(n_classes=2).to(device)
model_sub.load_state_dict(torch.load("../model/checkpoints/binary_train_real_fake_2k_fine_tune_meta_test.pt"))
model_sub.eval()
model_sub.get_model_accuracy_binary(features=taskA_test_data.features,
                                    true_labels=taskA_test_data.label,
                                    device=device,
                                    binary_model=True)

0.8865344524383545

In [29]:
model_fine_tuned = MultiClassClassifier(n_classes=2).to(device)
model_fine_tuned.load_state_dict(torch.load("../model/checkpoints/binary_train_real_fake_2k_fine_tune_meta_test.pt"))
model.train()

train_loader = DataLoader(taskA_train_data,batch_size)

lr = 1e-3
batch_size = 64
n_epochs = 200

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_fine_tuned.parameters(), lr=lr)
rng = torch.Generator().manual_seed(SEED)

for epoch in range(1,n_epochs+1):
    for idx, batch in enumerate(train_loader):
        # prediction and loss
        pred = model_fine_tuned((batch["features"]).to(device))
        loss = loss_fn(pred,batch["label"].type(torch.LongTensor).to(device))

        # backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
    loss, current = loss.item(), idx*batch_size + len(batch["features"])
    if epoch%10 == 0 and epoch > 0:
        print(f"loss: {loss:>7f}  [{epoch:>5d}/{n_epochs:>5d}]")

loss: 0.147629  [   10/  200]
loss: 0.117267  [   20/  200]
loss: 0.093667  [   30/  200]
loss: 0.076725  [   40/  200]
loss: 0.064605  [   50/  200]
loss: 0.055294  [   60/  200]
loss: 0.047925  [   70/  200]
loss: 0.042112  [   80/  200]
loss: 0.037449  [   90/  200]
loss: 0.033690  [  100/  200]
loss: 0.030580  [  110/  200]
loss: 0.028099  [  120/  200]
loss: 0.026055  [  130/  200]
loss: 0.024351  [  140/  200]
loss: 0.022886  [  150/  200]
loss: 0.021621  [  160/  200]
loss: 0.020550  [  170/  200]
loss: 0.019594  [  180/  200]
loss: 0.018729  [  190/  200]
loss: 0.017950  [  200/  200]


In [30]:
model_fine_tuned.get_model_accuracy_binary(features=taskA_test_data.features,
                                           true_labels=taskA_test_data.label,
                                           device=device,
                                           binary_model=True)

0.9453027248382568

In [35]:
clip_dino_train = SimpleDataset(torch.cat((taskA_train_data.features.cpu(),taskA_train_data_dino.features.cpu()),dim=1),
                                taskA_train_data.label)
clip_dino_test = SimpleDataset(torch.cat((taskA_test_data.features.cpu(),taskA_test_data_dino.features.cpu()),dim=1),
                                taskA_test_data.label)

In [37]:
model_clip_dino = MultiClassClassifier(n_features=CLIP_FEATURE_DIM+DINO_FEATURE_DIM,n_classes=2).to(device)
model_clip_dino.load_state_dict(torch.load("../model/checkpoints/binary_train_real_fake_2k_fine_tune_meta_test.pt"))
model_clip_dino.train()


lr = 1e-3
batch_size = 64
n_epochs = 200

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_clip_dino.parameters(), lr=lr)
rng = torch.Generator().manual_seed(SEED)

train_loader = DataLoader(clip_dino_train,batch_size=batch_size,generator=rng, shuffle=True)

for epoch in range(1,n_epochs+1):
    for idx, batch in enumerate(train_loader):
        # prediction and loss
        pred = model_clip_dino((batch["features"]).to(device))
        loss = loss_fn(pred,batch["label"].type(torch.LongTensor).to(device))

        # backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
    loss, current = loss.item(), idx*batch_size + len(batch["features"])
    if epoch%10 == 0 and epoch > 0:
        print(f"loss: {loss:>7f}  [{epoch:>5d}/{n_epochs:>5d}]")

RuntimeError: Error(s) in loading state_dict for MultiClassClassifier:
	size mismatch for fc1.weight: copying a param with shape torch.Size([512, 768]) from checkpoint, the shape in current model is torch.Size([512, 1536]).