In [1]:
import logging
import math
import sys
from abc import abstractmethod
from typing import Dict, Optional, Tuple

import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from efficientnet_pytorch import EfficientNet
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchsummary import summary
from torchvision import datasets, models, transforms
from torchvision.transforms import ToTensor
from torchvision.utils import save_image
from tqdm import tqdm


In [2]:
# AUTO = tf.data.experimental.AUTOTUNE
# IMAGE_SIZE = [512, 512]
# EPOCHS = 2000
# BATCH_SIZE_PER_TPU = 8
# BATCH_SIZE = BATCH_SIZE_PER_TPU * strategy.num_replicas_in_sync
# FOLDERNAME = "v2clean_sample"
NUM_CLASSES = 81313
EMB_SIZE = 512
EFNS = [
    EfficientNet.from_pretrained("efficientnet-b0", num_classes=EMB_SIZE),
    EfficientNet.from_pretrained("efficientnet-b1"),
    EfficientNet.from_pretrained("efficientnet-b2"),
    EfficientNet.from_pretrained("efficientnet-b3"),
    EfficientNet.from_pretrained("efficientnet-b4"),
    EfficientNet.from_pretrained("efficientnet-b5"),
    EfficientNet.from_pretrained("efficientnet-b6"),
    EfficientNet.from_pretrained("efficientnet-b7", num_classes=EMB_SIZE),
]
EFF_VER = 0


Loaded pretrained weights for efficientnet-b0
Loaded pretrained weights for efficientnet-b1
Loaded pretrained weights for efficientnet-b2
Loaded pretrained weights for efficientnet-b3
Loaded pretrained weights for efficientnet-b4
Loaded pretrained weights for efficientnet-b5
Loaded pretrained weights for efficientnet-b6
Loaded pretrained weights for efficientnet-b7


In [3]:
summary(EFNS[EFF_VER], (3, 512, 512))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         ZeroPad2d-1          [-1, 3, 513, 513]               0
Conv2dStaticSamePadding-2         [-1, 32, 256, 256]             864
       BatchNorm2d-3         [-1, 32, 256, 256]              64
MemoryEfficientSwish-4         [-1, 32, 256, 256]               0
         ZeroPad2d-5         [-1, 32, 258, 258]               0
Conv2dStaticSamePadding-6         [-1, 32, 256, 256]             288
       BatchNorm2d-7         [-1, 32, 256, 256]              64
MemoryEfficientSwish-8         [-1, 32, 256, 256]               0
          Identity-9             [-1, 32, 1, 1]               0
Conv2dStaticSamePadding-10              [-1, 8, 1, 1]             264
MemoryEfficientSwish-11              [-1, 8, 1, 1]               0
         Identity-12              [-1, 8, 1, 1]               0
Conv2dStaticSamePadding-13             [-1, 32, 1, 1]             288
         I

In [4]:
class AngularMarginHead(nn.Module):
    def __init__(
        self, feature_size: int, nrof_classes: int, s: int, m: float, clip: Optional[bool] = True
    ) -> None:
        super().__init__()
        self.s = s
        self.m = m
        self.cos_m = math.cos(m)
        self.sin_m = math.sin(m)
        self.min_allowed = math.cos(math.pi - m)
        self.clip = clip

        self._cosine = None
        self.sine = None

        self.weight = nn.parameter.Parameter(torch.Tensor(nrof_classes, feature_size))
        nn.init.xavier_uniform_(self.weight)

    @property
    def cosine(self):
        return self._cosine

    @cosine.setter
    def cosine(self, value):
        self._cosine = value
        self.sine = torch.sqrt(1 - self.cosine ** 2)

    @abstractmethod
    def positive_cosine_similarity_modulator(self) -> torch.Tensor:
        """
        Positive cosine similarity modulator
        """

    @abstractmethod
    def negative_cosine_similarity_modulator(
        self, cosine_after_positive_modulator: torch.Tensor
    ) -> torch.Tensor:
        """
        Negative cosine similarity modulator
        """

    def forward(self, features: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
        self.cosine = F.linear(F.normalize(features), F.normalize(self.weight))
        if self.clip:
            self.cosine = torch.clip(self.cosine, self.min_allowed, 0.99)

        one_hot = torch.zeros_like(self.cosine).to(y.device)
        one_hot.scatter_(1, y.view(-1, 1).long(), 1)

        cosine_after_positive_modulator = self.positive_cosine_similarity_modulator()
        cosine_after_negative_modulator = self.negative_cosine_similarity_modulator(
            cosine_after_positive_modulator
        )

        output = torch.where(
            one_hot == 1, cosine_after_positive_modulator, cosine_after_negative_modulator
        )
        return self.s * output


class ArcFace(AngularMarginHead):
    def __init__(
        self,
        feature_size: int,
        nrof_classes: int,
        s: Optional[int] = 64,
        m: Optional[float] = 0.5,
        clip: Optional[bool] = True,
    ) -> None:
        super().__init__(feature_size, nrof_classes, s, m, clip)

    def positive_cosine_similarity_modulator(self) -> torch.Tensor:
        """
        Positive cosine modulator for ArcFace is: T(cos(theta)) = cos(theta + m),
        if we expand this expression it becomes: cos(theta)*cos(m) - sin(theta)*sin(m)
        """
        return self.cosine * self.cos_m - self.sine * self.sin_m

    def negative_cosine_similarity_modulator(
        self, cosine_after_positive_modulator: torch.Tensor
    ) -> torch.Tensor:
        """
        There is no modulation for negative cosine similarity in ArcFace
        """
        return self.cosine


In [5]:
class EfficientNetBackbone(nn.Module):
    def __init__(self, feature_size: int):
        super(EfficientNetBackbone, self).__init__()

        self.efficientNet = EFNS[EFF_VER]

    def forward(self, x):
        return self.efficientNet(x)


In [6]:
def setup_logger():
    logger = logging.getLogger("train")
    logger.setLevel(logging.INFO)
    if len(logger.handlers) == 0:
        formatter = logging.Formatter("%(asctime)s | %(message)s")
        ch = logging.StreamHandler(stream=sys.stdout)
        ch.setFormatter(formatter)
        logger.addHandler(ch)
        fh = logging.handlers.WatchedFileHandler("train.log")
        fh.setFormatter(formatter)
        logger.addHandler(fh)
    return logger


logger = setup_logger()


class SummaryWriter:
    def __init__(self, nb_epochs: int, nb_batchs: int) -> None:
        self.nb_epochs = nb_epochs
        self.nb_batchs = nb_batchs
        self.epoch = 0

    def set_epoch(self, epoch: int) -> None:
        self.epoch = epoch

    def __call__(self, mode: str, i_batch: int, metrics: Dict[str, float]) -> None:
        summary = f"{mode.title()} Epoch {self.epoch}/{self.nb_epochs} | Batch {i_batch}/{self.nb_batchs} | "
        for metric_name, metric_value in metrics.items():
            summary += f"{metric_name.title()} {metric_value:.2f} | "
        logger.info(summary[:-2])


In [47]:
@torch.no_grad()
def calculate_accuracy(logits: torch.Tensor, y: torch.Tensor) -> float:
    y_pred = torch.argmax(logits, dim=1)
    return torch.mean((y_pred == y).float()).item()


def pass_epoch(
    loader: DataLoader,
    backbone: nn.Module,
    angular_margin: nn.Module,
    optimizer: optim.Optimizer,
    loss_fn: nn.Module,
    summary_writer: SummaryWriter,
    log_interval: int,
    device: str,
) -> Tuple[float]:

    loss = 0
    acc = 0
    with torch.set_grad_enabled(backbone.training):
        for i_batch, (x, y) in enumerate(loader):
            logging.info(i_batch)
            x = x.to(device)
            y = y.to(device)

            logging.info("Computing embeddings")
            embeddings = backbone(x)
            logits = angular_margin(embeddings, y)

            logging.info("Computing loss")
            print(logits.shape)
            print(y.shape)
            loss_batch = loss_fn(logits, y)
            acc_batch = calculate_accuracy(logits, y)

            logging.info("Updating weights")
            if backbone.training:
                optimizer.zero_grad()
                loss_batch.backward()
                optimizer.step()

            loss_batch = loss_batch.item()
            if i_batch % log_interval == 0:
                mode = "train" if backbone.training else "validation"
                summary_writer(mode, i_batch, {"loss": loss_batch, "acc": acc_batch})

            loss += loss_batch
            acc += acc_batch

    loss /= i_batch + 1
    acc /= i_batch + 1
    return loss, acc


def train(
    train_loader: DataLoader,
    validation_loader: DataLoader,
    angular_margin: nn.Module,
    loss_fn: nn.Module,
    feature_size: int,
    lr: float,
    nb_epochs: int,
    log_interval: int,
    early_stop_after: int,
    device: str,
) -> nn.Module:

    backbone = EfficientNetBackbone(feature_size)
    backbone.to(device)
    angular_margin.to(device)

    backbone_state_dict = backbone.state_dict()
    head_state_dict = angular_margin.state_dict()

    optimizer = optim.Adam(list(backbone.parameters()) + list(angular_margin.parameters()), lr=lr)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5)

    summary_writer = SummaryWriter(nb_epochs, len(train_loader))

    logging.info("Starting")

    min_loss = math.inf
    for epoch in range(nb_epochs):
        summary_writer.set_epoch(epoch + 1)

        backbone.train()
        angular_margin.train()

        logging.info("Passing epoch")
        loss, acc = pass_epoch(
            train_loader,
            backbone,
            angular_margin,
            optimizer,
            loss_fn,
            summary_writer,
            log_interval,
            device,
        )
        logger.info(f"Train Epoch Loss: {loss:.2f} | Accuracy: {acc:.2f}\n")

        backbone.eval()
        angular_margin.eval()
        loss, acc = pass_epoch(
            validation_loader,
            backbone,
            angular_margin,
            optimizer,
            loss_fn,
            summary_writer,
            log_interval,
            device,
        )
        acc = round(acc, 2)
        logger.info(f"Validation Epoch Loss: {loss:.2f} | Accuracy: {acc}\n")

        if loss > min_loss:
            min_loss = loss
            early_stop_counter = 0
            backbone_state_dict = backbone.state_dict()
            head_state_dict = angular_margin.state_dict()
        else:
            early_stop_counter += 1

        if early_stop_counter == early_stop_after:
            break

        scheduler.step(loss)

    backbone.load_state_dict(backbone_state_dict)
    angular_margin.load_state_dict(head_state_dict)
    return backbone, angular_margin, min_loss


@torch.no_grad()
def extract_embeddings(
    experiment_name: str, test_loader: DataLoader, backbone: nn.Module, device: str
) -> Tuple[np.ndarray]:
    logger.info(f"Extracting embeddings with the model with: {experiment_name}")
    log_interval = len(test_loader) // 5
    test_embeddings = []

    backbone.eval()
    for i_batch, x in enumerate(test_loader):
        x = x.to(device)

        test_embeddings.append(backbone(x))

        if i_batch % log_interval == 0:
            logger.info(f"Extracting embedings Batch {i_batch}/{len(test_loader)}")

    return torch.cat(test_embeddings).cpu().numpy()


In [8]:
# Head parameters
m = 0.5  # For all
s = 64  # For all
alpha = 0.99  # For CurricularFace
t = 1.2  # For MV-Arc-Softmax

nb_epochs = 50
batch_size = 256
lr = 5e-4
loss_fn_to_use = "softmax"
early_stop_after = 11
feature_size = 512  # Embeddings size
nb_classes = 81313  # The clean_train dataset has 81313 classes
log_interval = 50
device = "cuda" if torch.cuda.is_available() else "cpu"

transformations = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Resize((512,512)),
        transforms.Normalize(
            mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
            std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
        ),
    ]
)

logger.info(f"Using device: {device}")


2022-01-17 00:52:08,544 | Using device: cpu


In [9]:
def get_path(folder, image_id):
    return os.path.join(folder, f"{image_id[0]}/{image_id[1]}/{image_id[2]}/{image_id}.jpg")


In [49]:
import os
import pandas as pd
from torch.utils.data import Dataset


class GoogleLandmarkDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        landmarks = sorted(list(set(self.img_labels["landmark_id"])))
        self.landmark_id_to_label = {landmarks[i]: i for i in range(len(landmarks))}
        self.img_dir = img_dir
        self.num_classes = len(landmarks)
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = get_path(self.img_dir, self.img_labels.iloc[idx, 0])
        image = cv.imread(img_path)
        label = self.landmark_id_to_label[self.img_labels.iloc[idx, 1]]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label


In [50]:
train_dataset = GoogleLandmarkDataset(
    img_dir="./data/train",
    annotations_file="./data/train.csv",
    transform=transformations,
)

train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size

nb_classes = train_dataset.num_classes

train_dataset, validation_dataset = random_split(train_dataset, [train_size, val_size])

# test_dataset = GoogleLandmarkDataset(
#     img_dir="../data/test", annotations_file="../data/test.csv", transform=transformations
# )

train_loader = DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True,
    pin_memory=True,
    num_workers=0,
    drop_last=True,
)

validation_loader = DataLoader(
    validation_dataset,
    batch_size=16,
    shuffle=False,
    pin_memory=True,
    num_workers=0,
    drop_last=False,
)

# test_loader = DataLoader(
#     test_dataset,
#     batch_size=batch_size,
#     shuffle=False,
#     pin_memory=True,
#     num_workers=16,
#     drop_last=False,
# )


In [32]:
train_features, train_labels = next(iter(train_loader))

In [34]:
train_labels.shape

torch.Size([16, 1, 349])

In [12]:
logging.basicConfig(level=logging.INFO)

In [51]:
backbone, head, acc = train(
    train_loader,
    validation_loader,
    ArcFace(feature_size, nb_classes, s, m),
    nn.CrossEntropyLoss(),
    feature_size,
    lr,
    10,
    1,
    early_stop_after,
    device,
)

plt.figure(num=None, figsize=(15, 15), dpi=80, facecolor="w", edgecolor="k")
plt.plot(head.ts)
plt.title("t values during training in CurricularFace")
plt.xlabel("step")
plt.ylabel("t")
logger.info(60 * "-" + "\n")


INFO:root:Starting
INFO:root:Passing epoch
INFO:root:0
INFO:root:Computing embeddings
INFO:root:Computing loss
INFO:root:Updating weights


torch.Size([16, 349])
torch.Size([16])
2022-01-17 01:22:50,503 | Train Epoch 1/10 | Batch 0/23 | Loss 39.86 | Acc 0.00 


INFO:train:Train Epoch 1/10 | Batch 0/23 | Loss 39.86 | Acc 0.00 
INFO:root:1
INFO:root:Computing embeddings
INFO:root:Computing loss
INFO:root:Updating weights


torch.Size([16, 349])
torch.Size([16])
2022-01-17 01:27:24,003 | Train Epoch 1/10 | Batch 1/23 | Loss 42.75 | Acc 0.00 


INFO:train:Train Epoch 1/10 | Batch 1/23 | Loss 42.75 | Acc 0.00 
INFO:root:2
INFO:root:Computing embeddings
INFO:root:Computing loss
INFO:root:Updating weights


torch.Size([16, 349])
torch.Size([16])


KeyboardInterrupt: 

In [1]:
from src.data.download import download_and_sort
import logging

logging.basicConfig(level = logging.INFO)
    
download_and_sort(begin=0, end=4, nb_landmarks=50000)
    

INFO:root:Downloading images from Google Landmark Dataset


Downloading images_000.tar and its md5sum...
Downloading images_001.tar and its md5sum...
Downloading images_002.tar and its md5sum...
Downloading images_003.tar and its md5sum...
Downloading images_004.tar and its md5sum...
images_001.tar extracted!
images_004.tar extracted!
images_003.tar extracted!
images_000.tar extracted!


INFO:root:Transfering downloaded images


images_002.tar extracted!
