<h3>Import libraries</h3>

In [None]:
# Standard Library
import math

# Torch-related
import torch
import torchvision
import torchmetrics
import timm

# General third party
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from PIL import Image

<h3>Config</h3>

In [None]:
class CFG:
    # General
    seed = 42

    # Data
    base_path = "../data"
    image_size = [224, 224]
    class_names = ['X4_mean', 'X11_mean', 'X18_mean',
                   'X26_mean', 'X50_mean', 'X3112_mean',]
    num_classes = len(class_names)
    aux_class_names = ['X4_sd', 'X11_sd', 'X18_sd',
                       'X26_sd', 'X50_sd', 'X3112_sd',]
    num_aux_classes = len(aux_class_names)
    feature_names = None
    num_features = 0
    
    # K-Fold
    num_folds = 5
    fold = 0

    # Model
    epochs = 1 # 12
    batch_size = 32 # 96
    lr_mode = "step"
    profile = False
    inet_output_size = 1408

    # Loss
    y_median = None
    eps = None

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

<h3>Load and process data set</h3>

In [None]:
# Train + Valid
df = pd.read_csv(f'{CFG.base_path}/train.csv')
df['image_path'] = f'{CFG.base_path}/train_images/'+df['id'].astype(str)+'.jpeg'
df.loc[:, CFG.aux_class_names] = df.loc[:, CFG.aux_class_names].fillna(-1)
display(df.head(2))

# Test
test_df = pd.read_csv(f'{CFG.base_path}/test.csv')
test_df['image_path'] = f'{CFG.base_path}/test_images/'+test_df['id'].astype(str)+'.jpeg'
display(test_df.head(2))

CFG.feature_names = test_df.columns[1:-1].tolist()
CFG.num_features = len(CFG.feature_names)

In [None]:
def build_augmenter():
    # Define augmentations
    aug_layers = torch.nn.ModuleList([
        torchvision.transforms.ColorJitter(brightness=0.1),
        torchvision.transforms.ColorJitter(contrast=0.1),
        torchvision.transforms.ColorJitter(saturation=(0.45, 0.55)),
        torchvision.transforms.ColorJitter(hue=0.1),
        torchvision.transforms.RandomErasing(scale=(0.06, 0.15)),
        torchvision.transforms.RandomVerticalFlip(),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.RandomResizedCrop(size=(CFG.image_size[0], CFG.image_size[1]), scale=(0.05, 0.15)),
        torchvision.transforms.RandomRotation(degrees=(0.6, 3.0)), # Approx multiplied by 360/2pi since keras works in radians while this works in degrees
    ])
    
    # Build augmenter, randomly applying augmentations
    augmenter = torchvision.transforms.RandomApply(aug_layers, p=0.5)
                  
    # Apply augmentations
    def augment(inp, label=None):
        inp["images"] = augmenter(inp["images"])
        return inp, label
    return augment


def build_decoder(with_labels=True):
    def decode_image(inp):
        # Read jpeg image
        path = inp["images"]
        image = Image.open(path)

        # Resize and convert to tensor
        transform = torchvision.transforms.Compose([
            torchvision.transforms.Resize(CFG.image_size),
            torchvision.transforms.ToTensor(),
        ])
        image = transform(image)
        
        # Rescale image
        image = image.float()
        image /= 255.0
        
        # Insert decoded image and return
        inp["images"] = image
        return inp

    def decode_label(label, num_classes):
        label = torch.tensor(label).float()
        label = label.view(num_classes)
        return label

    def decode_with_labels(inp, labels):
        inp = decode_image(inp)
        label = decode_label(labels[0], CFG.num_classes)
        aux_label = decode_label(labels[1], CFG.num_aux_classes)
        return (inp, (label, aux_label))

    return decode_with_labels if with_labels else decode_image

class INetDataset(torch.utils.data.Dataset):
    def __init__(self, paths, features, labels=None, aux_labels=None, decode_fn=None, augment_fn=None):
        self.paths = paths
        self.features = features
        self.labels = labels
        self.aux_labels = aux_labels
        self.decode_fn = decode_fn
        self.augment_fn = augment_fn

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        inp = {"images": self.paths[idx], "features": self.features[idx]}
        if self.labels is not None:
            slices = self.decode_fn(inp, (self.labels[idx], self.aux_labels[idx]))
            if self.augment_fn is not None:
                slices = self.augment_fn(inp, (self.labels[idx], self.aux_labels[idx]))
        else:
            slices = self.decode_fn(inp)
            if self.augment_fn is not None:
                slices = self.augment_fn(inp)

        return slices

def build_dataset(
    paths,
    features,
    labels=None,
    aux_labels=None,
    batch_size=32,
    augment=False,
    shuffle=False,
):
    decode_fn = build_decoder(labels is not None or aux_labels is not None)
    augment_fn = build_augmenter() if augment else None
    dataset = INetDataset(paths, features, labels, aux_labels, decode_fn, augment_fn)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
    return dataloader

In [None]:
skf = StratifiedKFold(n_splits=CFG.num_folds, shuffle=True, random_state=CFG.seed)

# Create separate bin for each traits
for i, trait in enumerate(CFG.class_names):

    # Determine the bin edges dynamically based on the distribution of traits
    bin_edges = np.percentile(df[trait], np.linspace(0, 100, CFG.num_folds + 1))
    df[f"bin_{i}"] = np.digitize(df[trait], bin_edges)

# Concatenate the bins into a final bin
df["final_bin"] = (
    df[[f"bin_{i}" for i in range(len(CFG.class_names))]]
    .astype(str)
    .agg("".join, axis=1)
)

# Perform the stratified split using final bin
df = df.reset_index(drop=True)
for fold, (train_idx, valid_idx) in enumerate(skf.split(df, df["final_bin"])):
    df.loc[valid_idx, "fold"] = fold

In [None]:
# Sample from full data
sample_df = df.copy()
train_df = sample_df[sample_df.fold != CFG.fold]
valid_df = sample_df[sample_df.fold == CFG.fold]
print(f"# Num Train: {len(train_df)} | Num Valid: {len(valid_df)}")

# Normalize features
scaler = StandardScaler()
train_features = scaler.fit_transform(train_df[CFG.feature_names].values)
valid_features = scaler.transform(valid_df[CFG.feature_names].values)

# Train
train_paths = train_df.image_path.values
train_labels = train_df[CFG.class_names].values
train_aux_labels = train_df[CFG.aux_class_names].values
train_ds = build_dataset(train_paths, train_features, train_labels, train_aux_labels,
                         batch_size=CFG.batch_size,
                         shuffle=True, augment=True)

# Valid
valid_paths = valid_df.image_path.values
valid_labels = valid_df[CFG.class_names].values
valid_aux_labels = valid_df[CFG.aux_class_names].values
valid_ds = build_dataset(valid_paths, valid_features, valid_labels, valid_aux_labels,
                         batch_size=CFG.batch_size,
                         shuffle=False, augment=False)


In [None]:
# Get a sample of the data
inps, tars = next(iter(train_ds))
imgs = inps["images"]

# Plot the first eight images of the sample with their labels
num_imgs, num_cols = 8, 4
plt.figure(figsize=(4 * num_cols, num_imgs // num_cols * 5))
for i, (img, tar) in enumerate(zip(imgs[:num_imgs], tars[0][:num_imgs])):
    plt.subplot(num_imgs // num_cols, num_cols, i + 1)

    img = img.permute(1, 2, 0)  # Change (C, H, W) to (H, W, C)
    
    img = img.numpy()
    tar = tar.numpy()
    
    img = (img - img.min()) / (img.max() + 1e-4)

    formatted_tar = "\n".join(
        [
            ", ".join(
                f"{name.replace('_mean','')}: {val:.2f}"
                for name, val in zip(CFG.class_names[j : j + 3], tar[j : j + 3])
            )
            for j in range(0, len(CFG.class_names), 3)
        ]
    )


    plt.imshow(img)
    plt.title(f"[{formatted_tar}]")
    plt.axis("off")

plt.tight_layout()
plt.show()

<h3>Define loss</h3>

In [None]:
CFG.y_median = torch.tensor(train_df[CFG.class_names].median(axis=0).values).to(device)
CFG.eps = torch.tensor([1e-6]).to(device)

class R2Loss(torch.nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, y_true, y_pred):
        ss_res = (y_true - y_pred)**2
        ss_total = (y_true - CFG.y_median)**2
        loss = torch.sum(ss_res, dim=0) / torch.maximum(torch.sum(ss_total, dim=0), CFG.eps)
        return torch.mean(loss)

class R2Metric(torchmetrics.Metric):
    def __init__(self):
        super().__init__()
        self.add_state("loss", default=torch.tensor(0.), dist_reduce_fx="mean")

    def update(self, y_true, y_pred):
        ss_res = torch.sum((y_true - y_pred) ** 2, dim=0)
        ss_total = torch.sum((y_true - CFG.y_median) ** 2, dim=0)
        loss = torch.sum(ss_res, dim=0) / torch.maximum(torch.sum(ss_total, dim=0), CFG.eps)
        self.loss.add_(loss.mean())

    def compute(self):
        return self.loss

<h3>Construct model</h3>

In [None]:
def search_timm_model(query):
    search_result = [n for n in timm.list_models(pretrained=True) if query in n]
    for i, name in enumerate(search_result):
        print(f'{i:02d} | {name}')
        
# search_timm_model('efficientnet')

In [None]:

class INet(torch.nn.Module):
    def __init__(self):
        super(INet, self).__init__()
        self.backbone = timm.create_model(
                'efficientnet_b2',
                pretrained=True,
                num_classes=0,
            )
        self.dropout1 = torch.nn.Dropout(0.2)
        self.fc1 = torch.nn.Linear(CFG.inet_output_size, CFG.num_classes)

        self.dense = torch.nn.Sequential(
            torch.nn.Linear(CFG.num_features, 326),
            torch.nn.SELU(),
            torch.nn.Linear(326, 64),
            torch.nn.SELU(),
            torch.nn.Dropout(0.1),
            torch.nn.Linear(64, CFG.num_aux_classes)
        )

    def forward(self, images, features):
        x1 = self.backbone(images)
        x1 = self.dropout1(x1)
        x1 = self.fc1(x1)

        features = features.to(torch.float32)
        x2 = self.dense(features)

        return {"head": x1, "aux_head": x2}

model = INet()
model.to(device)

<h3>LR Scheduler</h3>

In [None]:
def get_lr_scheduler(optimizer):
    lr_start, lr_max, lr_min = 5e-5, 8e-6 * CFG.batch_size, 1e-5
    lr_ramp_ep, lr_sus_ep, lr_decay = 3, 0, 0.75

    def lrfn(epoch):  # Learning rate update function
        if epoch < lr_ramp_ep: 
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
        elif epoch < lr_ramp_ep + lr_sus_ep: 
            lr = lr_max
        elif CFG.lr_mode == 'exp': 
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min
        elif CFG.lr_mode == 'step': 
            lr = lr_max * lr_decay**((epoch - lr_ramp_ep - lr_sus_ep) // 2)
        elif CFG.lr_mode == 'cos':
            decay_total_epochs = CFG.epochs - lr_ramp_ep - lr_sus_ep + 3
            decay_epoch_index = epoch - lr_ramp_ep - lr_sus_ep
            phase = math.pi * decay_epoch_index / decay_total_epochs
            lr = (lr_max - lr_min) * 0.5 * (1 + math.cos(phase)) + lr_min
        return lr

    return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lrfn)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = get_lr_scheduler(optimizer)

<h3>Training</h3>

In [None]:
def train_epoch(model, train_ds, optimizer, loss_fn_head, loss_fn_aux_head, device):
    model.train()
    for inp, (labels, aux_labels) in tqdm(train_ds):
        optimizer.zero_grad()
        labels = labels.to(device)
        aux_labels = aux_labels.to(device)
        images = inp["images"].to(device)
        features = inp["features"].to(device)

        outputs = model(images, features)
        loss_head = loss_fn_head(outputs["head"], labels)
        loss_aux_head = loss_fn_aux_head(outputs["aux_head"], aux_labels)
        loss = loss_head
        loss += 0.3 * loss_aux_head
        loss.backward()
        optimizer.step()
    
    print(f"Loss: {loss.item():.4f}")

# Initialize best R2 and best epoch
best_R2 = -np.inf
best_epoch = -1

# Initialize loss functions
loss_fn_head = R2Loss().to(device)
loss_fn_aux_head = R2Loss().to(device)

# Training loop
for epoch in range(CFG.epochs):
    print(f"Epoch: {epoch+1} | LR: {optimizer.param_groups[0]['lr']:0.10f}")

    try:
        if CFG.profile:
            with torch.profiler.profile(activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA], record_shapes=True) as prof:
                with torch.profiler.record_function("model_inference"):
                        train_epoch(model, train_ds, optimizer, loss_fn_head, loss_fn_aux_head, device)
        else:
            train_epoch(model, train_ds, optimizer, loss_fn_head, loss_fn_aux_head, device)
    except RuntimeError as e:
        if str(e).startswith("mat1 and mat2 shapes cannot be multiplied"):
            raise ValueError(f"Wrong output size for the imagenet in config (CFG.inet_output_size): should be {str(e).split('x')[1].split(' ')[0]} but is {CFG.inet_output_size}.")
        else:
            raise RuntimeError(e)

    # Update learning rate
    scheduler.step()

    # Calculate R2 score on validation set
    with torch.no_grad():
        model.eval()
        outputs = [model(inp["images"].to(device), inp["features"].to(device)) for inp, _ in tqdm(valid_ds)]
        preds = torch.cat([o["head"] for o in outputs])
        labels = torch.cat([labels[1] for _, labels in valid_ds])
        R2 = R2Metric().to(device)
        R2 = R2(preds.to(device), labels.to(device))

    # Update best R2 and best epoch
    if R2 > best_R2:
        best_R2 = R2
        best_epoch = epoch+1

    print(f"R2: {R2:.4f} | Best R2: {best_R2:.4f} (epoch {best_epoch+1})")


# Print profiler results
if CFG.profile:
    print(prof.key_averages().table(sort_by="cuda_time_total"))

<h3>Results</h3>

In [None]:
# Best Result
print("#" * 10 + " Result " + "#" * 10)
print(f"Best R2: {best_R2:.5f}")
print(f"Best Epoch: {best_epoch}")
print("#" * 28)

In [None]:
# Test
test_paths = test_df.image_path.values
test_features = scaler.transform(test_df[CFG.feature_names].values)
test_ds = build_dataset(test_paths, test_features, batch_size=CFG.batch_size,
                        shuffle=False, augment=False)

preds = []
for inp in tqdm(test_ds):
    with torch.no_grad():
        batch_preds = model(inp["images"].to(device), inp["features"].to(device))["head"]
        preds.append(batch_preds)

# Concatenate all predictions
preds = torch.cat(preds)

In [None]:
pred_df = test_df[["id"]].copy()
target_cols = [x.replace("_mean","") for x in CFG.class_names]
pred_df[target_cols] = preds.tolist()

pred_df.to_csv("submission.csv", index=False)
pred_df.head()