In [1]:
import os
import pickle

from pathlib import Path

import pandas as pd
import numpy as np
import torch
import torch.nn as nn

from PIL import Image
from sklearn.metrics import f1_score
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models, transforms

In [2]:
ROOT_DIR = '../'
MODEL_PATH = ROOT_DIR + 'models/Patryk-ResNeXt-more-layers-model.pkt'
VALIDATION_LABELS_PATH = ROOT_DIR + 'data/validation_labels.csv'
VALIDATION_DATA_PATH = ROOT_DIR + 'data/validation_images'
OPTIMAL_THRESHOLD_PATH = ROOT_DIR + 'models/optimal_thresholds.npy'

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(DEVICE)

cpu


  return torch._C._cuda_getDeviceCount() > 0


In [3]:
def OFO(y_pred, y_true):
    length = y_pred.shape[1]
    a = np.ones(length)
    b = 2 * np.ones(length)
    tau = a / b
    
    for i in range(y_pred.shape[0]):
        row_true = y_true[i]
        row_pred = y_pred[i]

        y_pred_threshold = (row_pred > tau).astype(int)
        a += np.logical_and(y_pred_threshold, row_true).astype(int)
        b += y_pred_threshold + row_true
        tau = a / b
    return tau


def skyhacks_f1_score(preds, y):
    return f1_score(y, preds, average = 'macro')

In [4]:
class MultiClassDataset(Dataset):

    def __init__(self , csv_file , img_dir , transform=None):
        self.df = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform
    
    def __getitem__(self, idx):
        d = self.df.iloc[idx]
        image = Image.open(f'{self.img_dir}/{d.Name}').convert("RGB")
        label = torch.tensor(d[1:].tolist() , dtype=torch.float32)
    
        if self.transform is not None:
            image = self.transform(image)
        return image, label
  
    def __len__(self):
        return len(self.df)

In [5]:
model = torch.load(MODEL_PATH, map_location=DEVICE)

In [6]:
batch_size = 16
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

validation_set = MultiClassDataset(VALIDATION_LABELS_PATH, VALIDATION_DATA_PATH, transform)
validation_loader = DataLoader(validation_set, shuffle=False, batch_size=batch_size)

In [7]:
results = []

for batch in validation_loader:
    x, y = batch
    res = model(x.to(DEVICE))
    res = torch.sigmoid(res).to(torch.float32).cpu().detach().numpy()
    # res = (torch.sigmoid(res) > 0.5).to(torch.float32).cpu().numpy()
    results.append(res)

In [8]:
df_val_true = pd.read_csv(VALIDATION_LABELS_PATH)
df_val_pred = pd.DataFrame(np.vstack(results), columns = df_val_true.columns[1:])
df_val_pred['Name'] = df_val_true['Name'].copy()
df_val_pred = df_val_pred[df_val_true.columns]

In [9]:
y_pred = df_val_pred.iloc[:, 1:].values
y_true = df_val_true.iloc[:, 1:].values

In [10]:
optimal_thresholds = OFO(y_pred, y_true)

In [11]:
optimal_thresholds

array([0.26666667, 0.45454545, 0.28571429, 0.43333333, 0.42105263,
       0.5       , 0.45762712, 0.38709677, 0.39622642, 0.30434783,
       0.33333333, 0.29861111, 0.35443038, 0.3968254 , 0.38541667,
       0.39130435, 0.38888889, 0.34848485, 0.36842105, 0.38461538,
       0.27160494, 0.34615385, 0.33333333, 0.09090909, 0.15789474,
       0.45971564, 0.34944238, 0.39393939, 0.36363636, 0.35658915,
       0.41463415, 0.5       , 0.41176471, 0.33333333, 0.13114754,
       0.44827586, 0.5       , 0.37944664])

In [12]:
with open(OPTIMAL_THRESHOLD_PATH, 'wb') as f:
    np.save(f, optimal_thresholds)

In [13]:
skyhacks_f1_score((y_pred > 0.5).astype(int), y_true)

0.6630268299592036

In [14]:
skyhacks_f1_score((y_pred > optimal_thresholds).astype(int), y_true)

0.7232951340904469