In [None]:
import pandas as pd
import os
import random

In [None]:
from torchvision.models import resnet50

In [None]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from skimage.transform import resize
import imageio.v3 as iio
import torch
import random
import torch.nn as nn
from pathlib import Path
import numpy as np

# Pretrained Model Path

In [None]:
model_path="/kaggle/input/hhjhjjh/pytorch/default/1/best_model (2).pth"

# Test Dataset Path

In [None]:
test_dataset_path="/kaggle/input/cosmys-face-datast/Comys_Hackathon5/Task_B/val"

# Model Defination

In [None]:
import torch
import torch.nn as nn
from torchvision.models import resnet50, ResNet50_Weights

class Mymodel(nn.Module):
    
    def __init__(self):
        super(Mymodel, self).__init__()
        
        self.f1 = resnet50(weights=ResNet50_Weights.DEFAULT)
        self.f1.fc = nn.Identity()  
        
        self.f2 = resnet50(weights=ResNet50_Weights.DEFAULT)
        self.f2.fc = nn.Identity()
        
        self.classification = nn.Sequential(
            nn.Linear(2048, 1),
            nn.Sigmoid()
        )
        
        self.fc1 = None
        self.fc2 = None
        

    def forward(self, x1, x2):
        
        fv1 = self.f1(x1)
        fv2 = self.f2(x2)
        
        self.fc1 = fv1
        self.fc2 = fv2
        
        dist = torch.abs(fv1 - fv2)
        
        out = self.classification(dist)
        
        return out
        

    def feats(self):
        return (self.fc1, self.fc2)


# Load the pretrained model

In [None]:
model=Mymodel()
model.load_state_dict(torch.load(model_path, map_location="cuda"))
model.to('cuda')

# Top 1% Accuracy

In [None]:
from torch.utils.data import Dataset
from pathlib import Path
import imageio.v3 as iio
import torch
import numpy as np
from skimage.transform import resize

class DataPicker(Dataset):
    def __init__(self, path, size=(224, 224)):
        self.path = Path(path)
        self.files = []
        self.classes = {}
        self.inx = 0
        self.size = size
        
        for cls in self.path.glob("*"):
            if not cls.is_dir():
                continue

            class_name = str(cls.name)
            self.classes[class_name] = self.inx
            self.inx += 1

            
            self.files += [f_path for f_path in cls.glob("*") if f_path.is_file()]

            
            distortion_folder = cls / "distortion"
            if distortion_folder.exists():
                self.files += [f_path for f_path in distortion_folder.glob("*") if f_path.is_file()]

    def __len__(self):
        return len(self.files)

    def __getitem__(self, index):
        img_path = self.files[index]

        img = iio.imread(img_path)

        
        if np.issubdtype(img.dtype, np.integer):
            info = np.iinfo(img.dtype)
        else:
            info = np.finfo(img.dtype)

        img = (img - info.min) / (info.max - info.min)
        img = resize(img, (*self.size, 3), anti_aliasing=True)

        
        img = torch.from_numpy(img).float().permute(2, 0, 1)


        if img_path.parent.name == "distortion":
            class_name = img_path.parent.parent.name
        else:
            class_name = img_path.parent.name

        label = self.classes[class_name]

        return img, label


In [None]:
datapicker=DataPicker(test_dataset_path)

In [None]:
len(datapicker)

In [None]:
test_loader=DataLoader(datapicker, batch_size=1, shuffle=False)

# Model Testing on Test Dataset

In [None]:
m1=model.f1
m2=model.f2
clf=model.classification

In [None]:
model1_feat=[]
model2_feat=[]
y_true=[]

In [None]:
for img,idx in test_loader:
    f1=m1(img.to('cuda')).tolist()[0]
    f2=m2(img.to('cuda')).tolist()[0]
    model1_feat.append(f1)
    model2_feat.append(f2)
    y_true.append(idx[0])
df_model1=pd.DataFrame(model1_feat)
df_model1['target']=y_true
df_model2=pd.DataFrame(model2_feat)
df_model2['target']=y_true
y_target=[int(i) for i in y_true]

In [None]:
df_model1['target']=y_target
df_model2['target']=y_target

In [None]:
import torch
import numpy as np

def compute_cross_top1_percent_excluding_self(df_model1, df_model2, clf):
    
    clf.eval()
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    clf.to(device)

    X1 = df_model1.drop(columns='target').values
    y1 = df_model1['target'].values
    X2 = df_model2.drop(columns='target').values
    y2 = df_model2['target'].values

    n = len(X1)
    correct = 0

    with torch.no_grad():
        for i in range(n):
            f1 = torch.tensor(X1[i], dtype=torch.float32).to(device)  
            f1_repeated = f1.unsqueeze(0).repeat(n-1, 1)  

            
            f2_masked = np.delete(X2, i, axis=0)
            y2_masked = np.delete(y2, i, axis=0)

            f2_tensor = torch.tensor(f2_masked, dtype=torch.float32).to(device) 
            diff = torch.abs(f1_repeated - f2_tensor)  

            
            scores = clf(diff).squeeze() 
            if scores.dim() == 0:
                scores = scores.unsqueeze(0)

           
            top_k = max(1, int(0.01 * (n-1)))
            
            top_indices = torch.topk(scores, top_k).indices.cpu().numpy()
            
            
            if any(y2_masked[j] == y1[i] for j in top_indices):
                correct += 1         
   
    return correct / n


In [None]:
top1_acc = compute_cross_top1_percent_excluding_self(df_model1, df_model2, clf)
print(f"Top 1% Accuracy : {top1_acc:.4f}")

# Macro F1 Score

In [None]:
sample_a_class=5
sample_o_class=6

In [None]:
def all_pair(l1, l2):
    return [(i, j) for i in l1 for j in l2]


def all_files(class_paths):
    imgs = []
    for cls in class_paths:
        all_imgs = get_all_imgs(cls)
        if all_imgs:  
            imgs.append(random.choice(all_imgs))
    return imgs


def get_all_imgs(cls_path):
    imgs = [str(p) for p in cls_path.glob("*") if p.is_file()]
    distortion_dir = cls_path / "distortion"
    if distortion_dir.exists():
        imgs += [str(p) for p in distortion_dir.glob("*") if p.is_file()]
    return imgs

In [None]:
import random
from pathlib import Path

# Main pairing logic
data = []
path = Path(test_dataset_path)

for cls in path.iterdir():
    if not cls.is_dir():
        continue

    # Get all images from current class including distortion
    class_imgs = get_all_imgs(cls)
    if len(class_imgs) < max(sample_a_class, sample_o_class):
        continue  # Skip classes with insufficient images

    # +ve pairs
    img1_t = random.sample(class_imgs, sample_a_class)
    img2_t = random.sample(class_imgs, sample_o_class)
    total_p_pair = all_pair(img1_t, img2_t)

    # -ve pairs
    other_classes = [c for c in path.iterdir() if c.name != cls.name and c.is_dir()]
    img1_t = random.sample(class_imgs, sample_a_class)
    img2_t = all_files(random.sample(other_classes, sample_o_class))
    total_n_pair = all_pair(img1_t, img2_t)

    # Store pairs
    for i1, i2 in total_p_pair:
        data.append([i1, i2, 1])
    for i1, i2 in total_n_pair:
        data.append([i1, i2, 0])


In [None]:
df_test=pd.DataFrame(data,columns=["img1", "img2","target"])

In [None]:
from torch.utils.data import Dataset
from pathlib import Path
import imageio.v3 as iio
from skimage.transform import resize
import numpy as np
import torch

class PairPicker(Dataset):
    def __init__(self, df, size=(224, 224)):
        self.df = df.sample(frac=1).reset_index(drop=True)  
        self.size = size

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        row = self.df.iloc[index]
        img1_path = Path(row[0])  
        img2_path = Path(row[1])  
        label = torch.tensor(row[2], dtype=torch.float32)  

        
        img1 = iio.imread(img1_path)
        img2 = iio.imread(img2_path)

        
        img1 = self._normalize_and_resize(img1)
        img2 = self._normalize_and_resize(img2)

        return img1, img2, label

    def _normalize_and_resize(self, img):
     
        if np.issubdtype(img.dtype, np.integer):
            info = np.iinfo(img.dtype)
        else:
            info = np.finfo(img.dtype)

        img = (img - info.min) / (info.max - info.min)

        
        img = resize(img, (*self.size, 3), anti_aliasing=True)
        img = torch.from_numpy(img).float().permute(2, 0, 1)

        return img


In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import torch
import numpy as np

def evaluate_model(model, val_loader, device='cuda'):
    model.eval()
    model.to(device)

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for img1, img2, label in val_loader:
            img1, img2 = img1.to(device), img2.to(device)
            label = label.float().to(device)

            output = model(img1, img2).squeeze()
            pred = (output > 0.5).float()

            all_preds.append([pred.item()])
            all_labels.append([label.item()])

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    acc = accuracy_score(all_labels, all_preds)
    print(f"\n Accuracy: {acc * 100:.2f}%")

    print("\n Classification Report:")
    print(classification_report(all_labels, all_preds, digits=4))

    cm = confusion_matrix(all_labels, all_preds)
    print("\n Confusion Matrix:")
    print(cm)  


In [None]:
pairpicker_test=PairPicker(df_test)
test_loader=DataLoader(pairpicker_test, batch_size=1, shuffle=False)

In [None]:
evaluate_model(model, test_loader, device='cuda' if torch.cuda.is_available() else 'cpu')