In [2]:
import numpy as np
import pandas as pd
import imageio.v3 as imageio
import albumentations as A

from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
from torch import nn
from tqdm.notebook import tqdm
from sklearn.preprocessing import StandardScaler


import torch
import timm
import torchmetrics
import time

tqdm.pandas()

In [3]:
class Config():
    IMAGE_SIZE = 384
    BACKBONE = 'swin_large_patch4_window12_384.ms_in22k_ft_in1k'
    TARGET_COLUMNS = ['X4_mean', 'X11_mean', 'X18_mean', 'X26_mean', 'X50_mean', 'X3112_mean']
    N_TARGETS = len(TARGET_COLUMNS)
    BATCH_SIZE = 10
    LR_MAX = 1e-4
    WEIGHT_DECAY = 0.01
    N_EPOCHS = 6
    TRAIN_MODEL = True
    Lower_Quantile = 0.005
    Upper_Quantile = 0.985

CONFIG = Config()

In [4]:
class TrainDataset(Dataset):
    def __init__(self, X_jpeg_bytes, X_tabular, y, transforms=None):
        self.X_jpeg_bytes = X_jpeg_bytes
        self.X_tabular = X_tabular
        self.y = y
        self.transforms = transforms

    def __len__(self):
        return len(self.y)

    def __getitem__(self, index):
        X_sample = self.transforms(
            image=imageio.imread(self.X_jpeg_bytes[index]),
        )['image']
        X_tabular_sample = self.X_tabular[index]
        y_sample = self.y[index]

        return X_sample, X_tabular_sample, y_sample

In [5]:
class TabularBackbone(nn.Module):
    def __init__(self, n_features, out_features):
        super().__init__()
        self.out_features = out_features
        self.fc = nn.Sequential(
            nn.Linear(n_features, 512),
            nn.BatchNorm1d(512),
            nn.GELU(),
            # nn.Dropout(0.1),
            nn.Linear(512, out_features),
        )

    def forward(self, x):
        return self.fc(x)
    
class ImageBackbone(nn.Module):
    def __init__(self, backbone_name, weight_path, out_features, fixed_feature_extractor=False):
        super().__init__()
        self.out_features = out_features
        self.backbone = timm.create_model(backbone_name, pretrained=False, num_classes=CONFIG.N_TARGETS)
        checkpoint = torch.load(weight_path).backbone
        self.backbone.load_state_dict(checkpoint.state_dict())
        if fixed_feature_extractor:
            for param in self.backbone.parameters():
                param.requires_grad = False
        in_features = self.backbone.num_features
        
        self.backbone.head = nn.Identity()
        self.head = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(in_features, out_features),
        )

    def forward(self, x):
        x = self.backbone(x)
        x = x.permute(0, 3, 1, 2)
        return self.head(x)

class Model(nn.Module):
    def __init__(self, img_backbone, tab_backbone, out_features:int):
        super().__init__()
        self.img_backbone = img_backbone
        self.tab_backbone = tab_backbone
        self.fc = nn.Sequential(
            nn.Linear(self.tab_backbone.out_features + self.img_backbone.out_features, 1024),
            nn.BatchNorm1d(1024),
            nn.GELU(),
            # nn.Dropout(0.1),
            nn.Linear(1024, 256),
            nn.BatchNorm1d(256),
            nn.GELU(),
            # nn.Dropout(0.1),
            nn.Linear(256, out_features),
        )

    def forward(self, img, tab):
        img_features = self.img_backbone(img)
        tab_features = self.tab_backbone(tab)
        features = torch.cat([img_features, tab_features], dim=1)
        return self.fc(features)

In [6]:
class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val):
        self.sum += val.sum()
        self.count += val.numel()
        self.avg = self.sum / self.count

In [7]:
def get_lr_scheduler(optimizer):
    return torch.optim.lr_scheduler.OneCycleLR(
        optimizer=optimizer,
        max_lr=CONFIG.LR_MAX,
        total_steps=CONFIG.N_STEPS,
        pct_start=0.1,
        anneal_strategy='cos',
        div_factor=1e1,
        final_div_factor=1e1,
    )

In [8]:
train = pd.read_csv('./data/train.csv')
test = pd.read_csv('./data/test.csv')

In [9]:
for column in CONFIG.TARGET_COLUMNS:
    lower_quantile = train[column].quantile(CONFIG.Lower_Quantile)
    upper_quantile = train[column].quantile(CONFIG.Upper_Quantile)
    train = train[(train[column] >= lower_quantile) & (train[column] <= upper_quantile)]

In [10]:
tabular = train.drop(columns = ['id'] + CONFIG.TARGET_COLUMNS)
test_tabular = test.drop(columns = ['id'])

In [11]:
LOG_FEATURES = ['X4_mean', 'X11_mean', 'X18_mean', 'X26_mean', 'X50_mean', 'X3112_mean']

y_train = np.zeros_like(train[CONFIG.TARGET_COLUMNS], dtype=np.float32)
for target_idx, target in enumerate(CONFIG.TARGET_COLUMNS):
    v = train[target].values
    if target in LOG_FEATURES:
        v = np.log10(v)
    y_train[:, target_idx] = v

In [12]:
# normalize tabular inputs
X_SCALER = StandardScaler()
tabular_scaled = X_SCALER.fit_transform(tabular).astype(np.float32)
test_tabular_scaled = X_SCALER.transform(test_tabular).astype(np.float32)

Y_SCALER = StandardScaler()
y_train_scaled = Y_SCALER.fit_transform(y_train).astype(np.float32)

In [13]:
print('JPEG Files Processing:')
train['file_path'] = train['id'].apply(lambda s: f'./data/train_images/{s}.jpeg')
train['jpeg_bytes'] = train['file_path'].apply(lambda fp: open(fp, 'rb').read())

test['file_path'] = test['id'].apply(lambda s: f'./data/test_images/{s}.jpeg')
test['jpeg_bytes'] = test['file_path'].apply(lambda fp: open(fp, 'rb').read())
print('JPEG Files Processing End')

JPEG Files Processing:
JPEG Files Processing End


In [14]:
CONFIG.N_TRAIN_SAMPLES = len(tabular_scaled)
CONFIG.N_STEPS_PER_EPOCH = (CONFIG.N_TRAIN_SAMPLES // CONFIG.BATCH_SIZE)
CONFIG.N_STEPS = CONFIG.N_STEPS_PER_EPOCH * CONFIG.N_EPOCHS + 1

In [15]:
MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]

TRAIN_TRANSFORMS = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomSizedCrop(
        [96, 128],
        CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE, w2h_ratio=1.0, p=0.8),
    A.Resize(CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE),
    A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.3),
    A.ImageCompression(quality_lower=85, quality_upper=100, p=0.3),
    A.ToFloat(),
    A.Normalize(mean=MEAN, std=STD, max_pixel_value=1),
    ToTensorV2(),
])

TEST_TRANSFORMS = A.Compose([
    A.Resize(CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE),
    A.ToFloat(),
    A.Normalize(mean=MEAN, std=STD, max_pixel_value=1),
    ToTensorV2(),
])

In [17]:
# train / test split
train_idx = np.random.choice(len(train), int(1 * len(train)), replace=False)
test_idx = np.setdiff1d(np.arange(len(train)), train_idx)

train_images = train['jpeg_bytes'].values[train_idx]
train_tabular = tabular_scaled[train_idx]
train_y = y_train_scaled[train_idx]

test_images = test['jpeg_bytes'].values
test_tabular = test_tabular_scaled

0


In [18]:
train_dataset = TrainDataset(
    train_images,
    train_tabular,
    train_y,
    TRAIN_TRANSFORMS
)

train_dataloader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True,
    drop_last=True,
    num_workers=0#psutil.cpu_count(),
)

test_dataset = TrainDataset(
    test['jpeg_bytes'].values,
    test_tabular,
    test['id'].values,
    TEST_TRANSFORMS,
)

test_dataloader = DataLoader(
    test_dataset,
    batch_size=1,
    shuffle=False,
    drop_last=False,
    num_workers=0#psutil.cpu_count(),
)

In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [20]:
img_backbone = ImageBackbone('swin_large_patch4_window12_384.ms_in22k_ft_in1k', './finetuned_test_model.pth', 384, fixed_feature_extractor=True)
tab_backbone = TabularBackbone(n_features=tabular_scaled.shape[1], out_features=128)

model = Model(img_backbone, tab_backbone, CONFIG.N_TARGETS)
model = model.to(device)

  checkpoint = torch.load(weight_path).backbone


In [21]:
MAE = torchmetrics.regression.MeanAbsoluteError().to(device)
R2 = torchmetrics.regression.R2Score(num_outputs=CONFIG.N_TARGETS, multioutput='uniform_average').to(device)
LOSS = AverageMeter()

Y_MEAN = torch.tensor(y_train).mean(dim=0).to(device)
EPS = torch.tensor([1e-6]).to(device)

In [22]:
LOSS_FN = nn.SmoothL1Loss()  # r2_loss

optimizer = torch.optim.AdamW(
    params=model.parameters(),
    lr=CONFIG.LR_MAX,
    weight_decay=CONFIG.WEIGHT_DECAY,
)

LR_SCHEDULER = get_lr_scheduler(optimizer)

In [24]:
print("Start Training:")
for epoch in range(CONFIG.N_EPOCHS):
    MAE.reset()
    R2.reset()
    LOSS.reset()
    model.train()

    for step, (X_image, X_tabular, y_true) in enumerate(train_dataloader):
        X_image = X_image.to(device)
        X_tabular = X_tabular.to(device)
        y_true = y_true.to(device)
        model = model.to(device)
        t_start = time.perf_counter_ns()
        y_pred = model(X_image, X_tabular)
        loss = LOSS_FN(y_pred, y_true)
        LOSS.update(loss)
        loss.backward()
        optimizer.step()
        # xm.optimizer_step(optimizer, barrier=True)
        optimizer.zero_grad()
        LR_SCHEDULER.step()
        MAE.update(y_pred, y_true)
        R2.update(y_pred, y_true)

        print(
            f'\rEPOCH {epoch + 1:02d}, {step + 1:04d}/{CONFIG.N_STEPS_PER_EPOCH} | ' +
            f'loss: {LOSS.avg:.4f}, mae: {MAE.compute().item():.4f}, r2: {R2.compute().item():.4f}, ' +
            f'step: {(time.perf_counter_ns() - t_start) * 1e-9:.3f}s, lr: {LR_SCHEDULER.get_last_lr()[0]:.2e}',
            end='\n' if (step + 1) == CONFIG.N_STEPS_PER_EPOCH else '', flush=True,
        )
    model = model.to(device)

    # save model
    torch.save(model.to('cpu').state_dict(), './test_model.pth')

Start Training:
EPOCH 01, 3627/3842 | loss: 0.1613, mae: 0.4283, r2: 0.6170, step: 0.654s, lr: 9.90e-055

In [24]:
# load model

model.to(device)

SUBMISSION_ROWS = []
model.eval()

for X_image, X_tabular, test_id in tqdm(test_dataloader):
    with torch.no_grad():
        y_pred = model(X_image.to(device), X_tabular.to(device)).detach().cpu().numpy()
    
    y_pred = Y_SCALER.inverse_transform(y_pred).squeeze()
    row = {'id': int(test_id)}
    
    for k, v in zip(CONFIG.TARGET_COLUMNS, y_pred):
        if k in LOG_FEATURES:
            row[k.replace('_mean', '')] = 10 ** v
        else:
            row[k.replace('_mean', '')] = v

    SUBMISSION_ROWS.append(row)
    
submission_df = pd.DataFrame(SUBMISSION_ROWS)
submission_df.to_csv('./submission.csv', index=False)
print("Submit!")

  0%|          | 0/6391 [00:00<?, ?it/s]

Submit!
