In [1]:
import os

import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

from sklearn.metrics.pairwise import cosine_similarity

import seaborn as sns

import PIL
from PIL import Image

import cv2
import time

import random

#from deepface import DeepFace
sns.set(style="white")
import time

In [2]:
#настроим random_seed всего
seed_num = 13
torch.manual_seed(seed_num)
torch.cuda.manual_seed(seed_num)
torch.cuda.manual_seed_all(seed_num)
torch.use_deterministic_algorithms(True) # do not work in collab
np.random.seed(seed_num)

import torch.backends.cudnn
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True # do not work in collab

In [3]:
project_path = r'F:\Codenrock\Kryptonite'
image_path = r'F:\Codenrock\Kryptonite\data\train\images'

##### Try DinoV2

In [5]:
import torch

# DINOv2
dinov2_vits14 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')
dinov2_vits14

Using cache found in C:\Users\serge/.cache\torch\hub\facebookresearch_dinov2_main


DinoVisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (blocks): ModuleList(
    (0-11): 12 x NestedTensorBlock(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): MemEffAttention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): LayerScale()
      (drop_path1): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=1536, out_features=384, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
      (ls2): LayerScale()
      (drop_path2): Identity()
    )
  )
  (n

In [10]:
import torchvision.transforms as transforms

image_transforms = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [14]:
dinov2_vits14.to(device)
dinov2_vits14.eval()

DinoVisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (blocks): ModuleList(
    (0-11): 12 x NestedTensorBlock(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): MemEffAttention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): LayerScale()
      (drop_path1): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=1536, out_features=384, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
      (ls2): LayerScale()
      (drop_path2): Identity()
    )
  )
  (n

In [16]:
# Function to extract features from an image
def extract_features(image):
    image_tensor = image_transforms(image).unsqueeze(0).to(device)
    with torch.no_grad():
        features = dinov2_vits14(image_tensor).float()
    return features

In [195]:
image = Image.open(os.path.join(image_path, '000000', '0.jpg'))
features = extract_features(image)
len(features[0])

384

In [197]:
image = Image.open(os.path.join(image_path, '000000', '3.jpg'))
features_3 = extract_features(image)
len(features_3[0])

384

In [201]:
from sklearn.metrics.pairwise import cosine_similarity

cosine_similarity(features[0].cpu().detach().reshape(1, -1), features_3[0].cpu().detach().reshape(1, -1))

array([[0.38299292]], dtype=float32)

In [203]:
image = Image.open(os.path.join(image_path, '000000', '1.jpg'))
features_1 = extract_features(image)
len(features_1[0])

384

In [205]:
cosine_similarity(features[0].cpu().detach().reshape(1, -1), features_1[0].cpu().detach().reshape(1, -1))

array([[0.5326899]], dtype=float32)

In [207]:
image = Image.open(os.path.join(image_path, '000001', '1.jpg'))
features_1_1 = extract_features(image)
len(features_1_1[0])

384

In [209]:
cosine_similarity(features[0].cpu().detach().reshape(1, -1), features_1_1[0].cpu().detach().reshape(1, -1))

array([[0.32376397]], dtype=float32)

### Check on train

In [11]:
train_list = os.listdir(image_path)
train_list[5]

'000005'

In [250]:
dino_res = [] 
start_time = time.time() 
for i in range(100):
    folder = str(train_list[i])
    for img in os.listdir(os.path.join(image_path, folder)):   
        image = Image.open(os.path.join(image_path, folder, img))
        features = extract_features(image)
        dino_res.append([folder, img, features.cpu().detach().numpy()[0]])

end_time = time.time()
print(end_time - start_time)

14.49461555480957


In [251]:
dino_res[0]

['000000',
 '0.jpg',
 array([ 4.06040251e-01, -2.82562107e-01,  2.72536683e+00, -3.19815755e-01,
         2.44818258e+00,  2.25224137e+00, -7.70664990e-01, -7.72158325e-01,
        -3.84756267e-01, -1.52134073e+00,  3.08043694e+00, -3.01087439e-01,
         1.06554532e+00,  1.82380307e+00,  1.74709022e-01,  7.59059563e-02,
        -3.81554365e-01,  3.59395194e+00, -6.66481435e-01,  3.21309757e+00,
         1.83832240e+00, -7.29189932e-01, -4.29921436e+00,  2.24033054e-02,
        -2.57733274e+00,  2.24134707e+00,  3.54285419e-01, -1.80264699e+00,
         9.48821425e-01, -3.20933390e+00, -1.21562850e+00, -9.29967284e-01,
        -3.24352312e+00, -1.28124022e+00, -2.14545822e+00,  4.92366123e+00,
        -3.77683163e+00, -5.06840527e-01, -3.84781301e-01,  2.58617687e+00,
        -1.82457304e+00, -1.83615375e+00, -5.14317334e-01, -4.72749376e+00,
        -1.87857556e+00,  3.79930049e-01,  6.00710034e-01,  9.59362447e-01,
        -2.34063077e+00, -1.70901990e+00, -4.28695536e+00, -3.50667

In [252]:
dino_table = pd.DataFrame(dino_res, columns = ['folder', 'img', 'dino_res'])
dino_table

Unnamed: 0,folder,img,dino_res
0,000000,0.jpg,"[0.40604025, -0.2825621, 2.7253668, -0.3198157..."
1,000000,1.jpg,"[-0.4514499, -1.5732969, 1.5072427, -1.1402816..."
2,000000,2.jpg,"[0.5290648, -1.6426156, 4.183897, 4.511059, 0...."
3,000000,3.jpg,"[3.0183055, -2.3972485, 4.8457127, 0.72337306,..."
4,000000,4.jpg,"[-3.7036185, 0.21727978, 3.4040785, 1.3180584,..."
...,...,...,...
977,000099,5.jpg,"[3.5311913, -0.66300964, 4.5454984, 3.7563012,..."
978,000099,6.jpg,"[4.8436356, -0.5722234, 4.9047365, 2.6402385, ..."
979,000099,7.jpg,"[2.2608945, 0.34873235, 0.63394886, -3.5838182..."
980,000099,8.jpg,"[3.3726974, -1.9818227, 4.285077, 3.7897644, 2..."


In [22]:
# open real/fake labels
with open(os.path.join(project_path, 'data', 'train', 'meta.json'), 'r') as myfile:
    data = myfile.read()

# parse file
meta = json.loads(data)

In [280]:
dino_table_np = np.array(dino_table)

In [282]:
emb_cos_sims = []

for i in range(len(dino_table_np)):
    img_1 = dino_table_np[i]
    img_1_label = meta[img_1[0] + '/' + img_1[1]]
    for j in range(len(dino_table_np)):
        img_2 = dino_table_np[j]
        img_2_label = meta[img_2[0] + '/' + img_2[1]]
        dino_cos_sim = cosine_similarity(img_1[2].reshape(1, -1), img_2[2].reshape(1, -1))

        if img_1[0] == img_2[0]:
            id_label = 'same'
        else:
            id_label = 'other'
        emb_cos_sims.append([img_1[0], img_1[1], img_1_label, img_2[0], img_2[1], img_2_label, id_label, dino_cos_sim[0][0]])

In [283]:
cos_sim_df = pd.DataFrame(emb_cos_sims, columns = ['folder_1', 'img_1', 'label_1', 'folder_2', 'img_2', 'label_2', 'id', 'dino_cos_sim'])
cos_sim_df

Unnamed: 0,folder_1,img_1,label_1,folder_2,img_2,label_2,id,dino_cos_sim
0,000000,0.jpg,0,000000,0.jpg,0,same,1.000000
1,000000,0.jpg,0,000000,1.jpg,0,same,0.532690
2,000000,0.jpg,0,000000,2.jpg,1,same,0.328386
3,000000,0.jpg,0,000000,3.jpg,1,same,0.382993
4,000000,0.jpg,0,000000,4.jpg,0,same,0.674528
...,...,...,...,...,...,...,...,...
964319,000099,9.jpg,1,000099,5.jpg,1,same,0.394137
964320,000099,9.jpg,1,000099,6.jpg,1,same,0.593866
964321,000099,9.jpg,1,000099,7.jpg,0,same,0.037962
964322,000099,9.jpg,1,000099,8.jpg,1,same,0.530302


In [288]:
print('Same person: ', cos_sim_df.loc[(cos_sim_df['label_1'] == 0) & (cos_sim_df['label_2'] == 0) & (cos_sim_df['id'] == 'same')]['dino_cos_sim'].mean())
print('Different person: ', cos_sim_df.loc[(cos_sim_df['label_1'] == 0) & (cos_sim_df['label_2'] == 0) & (cos_sim_df['id'] == 'other')]['dino_cos_sim'].mean())
print('Real vs Fake: ', cos_sim_df.loc[(cos_sim_df['label_1'] == 0) & (cos_sim_df['label_2'] == 1) & (cos_sim_df['id'] == 'same')]['dino_cos_sim'].mean())

Same person:  0.59913665
Different person:  0.30194122
Real vs Fake:  0.43516958


##### Works faster, than previous models. Let's check quality on public_test submit

### Submit

In [290]:
ssd_path = r'F:\Codenrock\Kryptonite\data\test_public\images'
ssd_list_dir = os.listdir(ssd_path)
ssd_list_dir[0]

'00000000'

In [314]:
cos_sims = []
start_time = time.time()
beg_time = start_time
for folder in ssd_list_dir:    
    img_path = os.path.join(ssd_path, folder)
    image_1 = Image.open(os.path.join(img_path, "0.jpg"))
    features_1 = extract_features(image_1)
    image_2 = Image.open(os.path.join(img_path, "1.jpg"))
    features_2 = extract_features(image_2)  
    sim_scores = cosine_similarity(features_1.cpu().detach().reshape(1, -1), features_2.cpu().detach().reshape(1, -1))
    cos_sims.append([folder, sim_scores[0][0]])
    if int(folder) % 1000 == 0:
        print(f'Current time: {folder} ', time.time() - beg_time)  
        beg_time = time.time()

end_time = time.time()
print('Inference time: ', end_time - start_time)

Current time: 00000000  0.279585599899292
Current time: 00001000  38.48548436164856
Current time: 00002000  38.366517066955566
Current time: 00003000  38.90357327461243
Current time: 00004000  39.366355657577515
Current time: 00005000  38.773035764694214
Current time: 00006000  39.41748666763306
Current time: 00007000  39.48129391670227
Current time: 00008000  38.89866518974304
Current time: 00009000  39.34936261177063
Current time: 00010000  39.401777029037476
Current time: 00011000  39.64877891540527
Current time: 00012000  40.166284799575806
Current time: 00013000  39.84890556335449
Current time: 00014000  40.83604693412781
Current time: 00015000  40.97736191749573
Current time: 00016000  40.41634654998779
Current time: 00017000  41.818305015563965
Current time: 00018000  40.96173334121704
Current time: 00019000  41.24811673164368
Current time: 00020000  42.48484539985657
Current time: 00021000  42.301769495010376
Current time: 00022000  42.152456521987915
Current time: 00023000  41

In [315]:
preds = pd.DataFrame(cos_sims, columns = ['pair_id', 'similarity'])
preds

Unnamed: 0,pair_id,similarity
0,00000000,1.000000
1,00000001,0.463270
2,00000002,0.628494
3,00000003,0.703372
4,00000004,0.617622
...,...,...
161186,00161186,0.612823
161187,00161187,0.729216
161188,00161188,0.295253
161189,00161189,0.547375


In [316]:
preds.to_csv('submit_v2.csv', index = False)

In [317]:
preds.loc[preds['similarity']<0.5]

Unnamed: 0,pair_id,similarity
1,00000001,0.463270
5,00000005,0.394853
8,00000008,0.451286
12,00000012,0.370295
13,00000013,0.303933
...,...,...
161180,00161180,0.376577
161181,00161181,0.370270
161183,00161183,0.101965
161188,00161188,0.295253


Результат: 0.3577