In [None]:
import torch
from torch import nn
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from tqdm import tqdm
import timm
import matplotlib.pyplot as plt

In [None]:
TRAIN_DATASET = ".data/train/"
TRAIN_CSV = "./data/private_info/train.csv"
IMG_SIZE = 448
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
def set_requires_grad(model, value=False):
    for param in model.parameters():
        param.requires_grad = value

Dataframes preprocessing

In [None]:
train_df = pd.read_csv(TRAIN_CSV, sep="\t")

Dataset class

In [None]:
class ArtDataset(Dataset):
    def __init__(self, df: pd.DataFrame, transform=None, train=True):
        self.transform = transform
        self.train = train
        self.file_names = TRAIN_DATASET + df["image_name"]
        if train:
            self.labels = df["label_id"]

    def __len__(self):
        return len(self.file_names)

    def __getitem__(self, idx: int):
        image = Image.open(self.file_names[idx]).convert('RGB')

        if self.transform is not None:
            image = self.transform(image)

        if self.train:
            target = self.labels[idx]
            return image.to(device), target
        else:
            return image.to(device)

In [None]:
train_df.head()

Unnamed: 0,image_name,label_id
0,af49360b0b294346b3dbf70602d56e37.jpg,0
1,009ccc6503584cabbdff7b98a2bc68e1.jpeg,0
2,a83a2120a2d945ce82175e0df44fb77b.jpg,0
3,8a71564f915143969042c0e190b0b47b.jpg,0
4,e1eaf20e92f043409052f47b3ce60cf5.jpg,0


Init model

In [None]:
model = timm.create_model('eva02_base_patch14_448.mim_in22k_ft_in1k', pretrained=True, num_classes=0)
model.eval()

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/348M [00:00<?, ?B/s]

In [None]:
set_requires_grad(model, False)

In [None]:
model = model.to(device)

Get emdeddings

In [None]:
data_config = timm.data.resolve_model_data_config(model)

In [None]:
data_config

{'input_size': (3, 448, 448),
 'interpolation': 'bicubic',
 'mean': (0.48145466, 0.4578275, 0.40821073),
 'std': (0.26862954, 0.26130258, 0.27577711),
 'crop_pct': 1.0,
 'crop_mode': 'center'}

In [None]:
from torchvision.transforms.functional import InterpolationMode

In [None]:
trans = transforms.Compose([
    transforms.Resize((448, 448)),
    transforms.ToTensor(),
    transforms.Normalize([0.48145466, 0.4578275, 0.40821073], [0.26862954, 0.26130258, 0.27577711])])

In [None]:
trans_rotate = transforms.Compose([
    transforms.RandomRotation(degrees=(-11, 11), interpolation=InterpolationMode.BILINEAR),
    transforms.Resize((448, 448)),
    transforms.ToTensor(),
    transforms.Normalize([0.48145466, 0.4578275, 0.40821073], [0.26862954, 0.26130258, 0.27577711])])

In [None]:
trans_flipped = transforms.Compose([
    transforms.Resize((448, 448)),
    transforms.RandomHorizontalFlip(p=1),
    transforms.ToTensor(),
    transforms.Normalize([0.48145466, 0.4578275, 0.40821073], [0.26862954, 0.26130258, 0.27577711])])

In [None]:
trans_perspective = transforms.Compose([
    transforms.RandomPerspective(0.4, p=1),
    transforms.CenterCrop((448, 448)),
    transforms.ToTensor(),
    transforms.Normalize([0.48145466, 0.4578275, 0.40821073], [0.26862954, 0.26130258, 0.27577711])])

In [None]:
dataset = DataLoader(ArtDataset(train_df, trans, train=False), batch_size=256, shuffle=False)
dataset_rotated = DataLoader(ArtDataset(train_df, trans_rotate, train=False), batch_size=256, shuffle=False)
dataset_flipped = DataLoader(ArtDataset(train_df, trans_flipped, train=False), batch_size=256, shuffle=False)
dataset_perspective = DataLoader(ArtDataset(train_df, trans_perspective, train=False), batch_size=256, shuffle=False)

In [None]:
results = []
with torch.no_grad():
    for data in tqdm(dataset):
        output = model(data).cpu().numpy()
        results.append(output)

res_df = pd.DataFrame({
                       "image_name": train_df["image_name"],
                       "embeddings": np.concatenate(results).tolist(),
                       "label_id": train_df["label_id"]
                       })

res_df.to_csv("embeddings.csv", index=False)

100%|██████████| 34/34 [55:48<00:00, 98.48s/it]


In [None]:
results = []
with torch.no_grad():
    for data in tqdm(dataset_rotated):
        output = model(data).cpu().numpy()
        results.append(output)

res_df = pd.DataFrame({
                       "image_name": train_df["image_name"],
                       "embeddings": np.concatenate(results).tolist(),
                       "label_id": train_df["label_id"]
                       })

res_df.to_csv("embeddings_rotated.csv", index=False)

100%|██████████| 34/34 [19:37<00:00, 34.63s/it]


In [None]:
results = []
with torch.no_grad():
  for data in tqdm(dataset_flipped):
      output = model(data).cpu().numpy()
      results.append(output)

res_df = pd.DataFrame({
                       "image_name": train_df["image_name"],
                       "embeddings": np.concatenate(results).tolist(),
                       "label_id": train_df["label_id"]
                       })

res_df.to_csv("embeddings_flipped.csv", index=False)

100%|██████████| 34/34 [14:20<00:00, 25.32s/it]


In [None]:
results = []
with torch.no_grad():
  for data in tqdm(dataset_perspective):
      output = model(data).cpu().numpy()
      results.append(output)

res_df = pd.DataFrame({
                       "image_name": train_df["image_name"],
                       "embeddings": np.concatenate(results).tolist(),
                       "label_id": train_df["label_id"]
                       })

res_df.to_csv("embeddings_perspective.csv", index=False)

100%|██████████| 34/34 [51:10<00:00, 90.32s/it]
