In [1]:
!pwd

/root/class/cmu/DL/cmu-hw2p2


In [15]:
from run import train, test, inference, face_embedding, verification_inference, gen_cls_submission, gen_ver_submission

In [4]:
from copy import deepcopy
import os
from tqdm import tqdm
from glob import glob

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torch.optim as optim
import torchvision
import torchvision.transforms as ttf
from torch.cuda.amp import GradScaler, autocast
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.metrics import accuracy_score, roc_auc_score
import hydra
from omegaconf import OmegaConf
import wandb

from models.cnn import BaselineCNN, VGG16
from datasets.classification import ClassificationTestSet
from datasets.verification import VerificationDataset
from datasets.transform import AlbumTransforms, train_transforms, val_transforms
from utils.utils import weight_decay_custom, compute_kl_loss, SAM

In [6]:
BASE_DIR = '/shared/youngkim/hw2p2'
CLS_DIR = os.path.join(BASE_DIR, '11-785-s22-hw2p2-classification')
VER_DIR = os.path.join(BASE_DIR, '11-785-s22-hw2p2-verification')

CLS_TRAIN_DIR = os.path.join(CLS_DIR, "train_subset/train_subset") # This is a smaller subset of the data. Should change this to classification/classification/train
CLS_VAL_DIR = os.path.join(CLS_DIR, "classification/classification/dev")
CLS_TEST_DIR = os.path.join(CLS_DIR, "classification/classification/test")

VER_VAL_DIR = os.path.join(VER_DIR, 'verification/verification/dev')
VER_TEST_DIR = os.path.join(VER_DIR, 'verification/verification/test')

val_veri_dataset = VerificationDataset(VER_VAL_DIR,
                                        AlbumTransforms(val_transforms))
test_veri_dataset = VerificationDataset(VER_TEST_DIR,
                                        AlbumTransforms(val_transforms))

val_ver_loader = torch.utils.data.DataLoader(val_veri_dataset, batch_size=128, 
                                                shuffle=False, num_workers=1)
test_ver_loader = torch.utils.data.DataLoader(test_veri_dataset, batch_size=128, 
                                                shuffle=False, num_workers=1)

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = VGG16().to(device)
print(device)

cuda


In [9]:
checkpoint = torch.load('/shared/youngkim/hw2p2/weights/vgg16.pth')
model.load_state_dict(checkpoint['model_state_dict'])
print(f"Model loaded: {'/shared/youngkim/hw2p2/weights/vgg16.pth'}")

Model loaded: /shared/youngkim/hw2p2/weights/vgg16.pth


In [None]:
best_model = model

val_feats_dict = face_embedding(best_model, val_ver_loader, device)
    
val_veri_csv = os.path.join(VER_DIR, "verification/verification/verification_dev.csv")
similarity_metric = nn.CosineSimilarity(dim=0)


In [19]:
list(val_feats_dict.items())[0][0]

'000b28b024.jpg'

In [14]:
list(val_feats_dict.items())[0][1].shape

torch.Size([512])

In [17]:
pd.read_csv(val_veri_csv).head()

Unnamed: 0,image_A,image_B,match
0,dev/ab001b21a1.jpg,dev/10246770ce.jpg,1
1,dev/c692b5fa6b.jpg,dev/299becf799.jpg,0
2,dev/d0dc5318e4.jpg,dev/3aac902136.jpg,0
3,dev/f9643ca7b5.jpg,dev/6f300f3205.jpg,1
4,dev/95ccb4131a.jpg,dev/3d207ca2b2.jpg,0


In [25]:
# Now, loop through the csv and compare each pair, getting the similarity between them
similarity_metric = nn.CosineSimilarity(dim=0)
similarity_metric.to(device)

pred_similarities = []
gt_similarities = []
for line in tqdm(open(val_veri_csv).read().splitlines()[1:], position=0, leave=True, desc='Veri'): # skip header
    img_path1, img_path2, gt = line.split(",")

    # TODO: Use the similarity metric
    # How to use these img_paths? What to do with the features?
    
    feat1 = val_feats_dict[img_path1.split("/")[-1]]
    feat2 = val_feats_dict[img_path2.split("/")[-1]]
    
    similarity = similarity_metric(feat1, feat2)

    pred_similarities.append(similarity.cpu())
    gt_similarities.append(int(gt))

pred_similarities = np.array(pred_similarities)
gt_similarities = np.array(gt_similarities)

auc = roc_auc_score(gt_similarities, pred_similarities)
print("AUC:", auc)

# auc = verification(val_veri_csv, val_feats_dict, similarity_metric, device)
# print("Verification AUC: ", auc)

Veri: 100%|██████████| 166800/166800 [00:12<00:00, 12946.29it/s]


AUC: 0.8937404067355124


In [26]:
test_feats_dict = face_embedding(best_model, test_ver_loader, device)

test_veri_csv = os.path.join(VER_DIR, "verification/verification/verification_test.csv")
pred_similarities = verification_inference(test_veri_csv, test_feats_dict, similarity_metric, device)

# gen_ver_submission
sub_path = '/shared/youngkim/hw2p2/submissions'
save_name = 'vgg16'
assert len(pred_similarities) == 667600
test_names = [i for i in range(len(pred_similarities))]
submission = pd.DataFrame(zip(test_names, pred_similarities), columns=['id', 'match'])
submission.to_csv(os.path.join(sub_path, f'{save_name}_ver_sub.csv'), index=False)

print("ver_submission saved.")

Embedding: 100%|██████████| 188/188 [00:45<00:00,  4.10it/s]
Veri_infer: 100%|██████████| 667600/667600 [00:52<00:00, 12762.08it/s]


ver_submission saved.
