In [None]:
# imports
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as f
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from torch.nn.parameter import Parameter
import cv2
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
import warnings
from scipy.optimize import minimize
from sklearn.metrics import classification_report, confusion_matrix, cohen_kappa_score
import seaborn as sns
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [None]:
# data loading and splitting
path_2019_dir = '/kaggle/input/aptos2019-blindness-detection/'
df_2019 = pd.read_csv(os.path.join(path_2019_dir, 'train.csv'))

df_2019['path'] = df_2019['id_code'].apply(lambda x: os.path.join(path_2019_dir, 'train_images', f'{x}.png'))
df_2019['target'] = df_2019['diagnosis']

train_df, val_df = train_test_split(df_2019, test_size=0.15, stratify=df_2019['target'], random_state=42)
print(f"data ready training on {len(train_df)} images")

data ready training on 3112 images


In [None]:
# preprocessing
img_size = 300

def ben_graham_processing(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        x, y, w, h = cv2.boundingRect(max(contours, key=cv2.contourArea))
        image = image[y:y+h, x:x+w]
    image = cv2.resize(image, (img_size, img_size))
    image = cv2.addWeighted(image, 4, cv2.GaussianBlur(image, (0,0), 10), -4, 128)
    mask = np.zeros((img_size, img_size), np.uint8)
    cv2.circle(mask, (img_size//2, img_size//2), img_size//2, 1, thickness=-1)
    return cv2.bitwise_and(image, image, mask=mask)

class drdataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        img = cv2.imread(self.df.iloc[idx]['path'])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = ben_graham_processing(img)
        label = torch.tensor(self.df.iloc[idx]['target'], dtype=torch.float32)
        if self.transform: img = self.transform(img)
        return img, label

train_aug = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_aug = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_loader = DataLoader(drdataset(train_df, train_aug), batch_size=16, shuffle=True, num_workers=0)
val_loader = DataLoader(drdataset(val_df, val_aug), batch_size=16, shuffle=False, num_workers=0)

In [None]:
# stable gem pooling and model
class gem(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(gem, self).__init__()
        self.p = p # fixed p instead of parameter to stop inplace errors
        self.eps = eps

    def forward(self, x):
        # using a more stable sequence of operations for autograd
        x = x.clamp(min=self.eps)
        x = x ** self.p
        x = f.avg_pool2d(x, (x.size(-2), x.size(-1)))
        x = x ** (1.0 / self.p)
        return x

# re-initialize the model
model = models.efficientnet_b3(weights='DEFAULT')
model.avgpool = gem(p=3) # set a fixed p=3
in_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(in_features, 1)
model = model.to(device)

criterion = nn.HuberLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4)

In [None]:
# training (limited epochs to prevent overfitting)
epochs = 10
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for images, labels in tqdm(train_loader):
        images, labels = images.to(device), labels.to(device).view(-1, 1)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

In [None]:
# save model after all epochs
import torch
try:
    torch.save(model.state_dict(), 'best_model.pth')
    print("saved successfully to /kaggle/working/best_model.pth")
except Exception as e:
    print(f"error saving: {e}")

saved successfully to /kaggle/working/best_model.pth


In [None]:
#stability check
def get_stable_prediction(model, img_tensor):
    model.eval()
    versions = [img_tensor, torch.flip(img_tensor, [3]), torch.flip(img_tensor, [2]), torch.rot90(img_tensor, 1, [2, 3])]
    results = []
    with torch.no_grad():
        for v in versions:
            results.append(model(v.to(device)).item())
    mean_val = np.mean(results)
    stability = 1.0 - np.std(results)
    verdict = "stable" if stability > 0.85 else "unstable"
    return mean_val, stability, verdict

img_tensor, _ = next(iter(val_loader))
val, stab, msg = get_stable_prediction(model, img_tensor[0].unsqueeze(0))
print(f"prediction: {val:.2f} | stability: {stab:.4f} | status: {msg}")

In [None]:
# accuracy & kappa optimizer
from scipy.optimize import minimize
from sklearn.metrics import accuracy_score, cohen_kappa_score

def optimize_metrics(coeffs, raw_preds, targets):
    # force thresholds to stay in order for np.digitize
    if not np.all(np.diff(coeffs) > 0):
        return 10.0 # return a high penalty if thresholds get out of order
    
    preds = np.digitize(raw_preds, coeffs)
    
    # trying balanceed accuracy and kappa weights
    acc = accuracy_score(targets, preds)
    kap = cohen_kappa_score(targets, preds, weights='quadratic')
    
    # we want the highest sum, so we return negative sum for the minimizer
    return -(0.5 * acc + 0.5 * kap)

# thresholds (inspired by the winning solution)
initial_thresholds = [0.7, 1.5, 2.5, 3.5]

# search for best thresholds for our specific validation set
result = minimize(optimize_metrics, initial_thresholds, 
                  args=(raw_preds, labels), 
                  method='nelder-mead',
                  options={'xatol': 1e-4})

best_thresholds = np.sort(result.x)
final_optimized_preds = np.digitize(raw_preds, best_thresholds)

print(f"optimized thresholds: {best_thresholds}")
print(f"--- final results ---")
print(f"new accuracy: {accuracy_score(labels, final_optimized_preds):.4f}")
print(f"new kappa: {cohen_kappa_score(labels, final_optimized_preds, weights='quadratic'):.4f}")
print(classification_report(labels, final_optimized_preds, target_names=target_names))

optimized thresholds: [0.76508932 1.47002369 2.67058553 2.94852521]
--- final results ---
new accuracy: 0.8055
new kappa: 0.8865
                 precision    recall  f1-score   support

       0-Normal       0.93      0.99      0.96       271
         1-Mild       0.54      0.46      0.50        56
     2-Moderate       0.73      0.81      0.77       150
       3-Severe       0.39      0.24      0.30        29
4-Proliferative       0.68      0.48      0.56        44

       accuracy                           0.81       550
      macro avg       0.65      0.60      0.62       550
   weighted avg       0.79      0.81      0.79       550



In [None]:
# experimenting with tta
def get_preds_with_tta(model, loader):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(loader):
            images = images.to(device)
            
            # original
            out1 = model(images)
            # horizontal flip
            out2 = model(torch.flip(images, dims=[3]))
            # vertical flip
            out3 = model(torch.flip(images, dims=[2]))
            
            
            avg_out = (out1 + out2 + out3) / 3.0
            
            all_preds.extend(avg_out.cpu().numpy())
            all_labels.extend(labels.numpy())
            
    return np.array(all_preds).flatten(), np.array(all_labels)

# get the new "stabilized" predictions
raw_preds_tta, labels = get_preds_with_tta(model, val_loader)

# need to run the optimizer again with tta preds