In [None]:
%%capture
!gdown --id 15AKhteDXWrdYl4VZBO66uW9vV-95GL4s
!unzip -qq /content/soil_data.zip

In [None]:
%%capture
!pip install --upgrade timm
!pip install pandarallel
!pip install pytorch_lightning
!pip install --upgrade segmentation_models_pytorch

In [None]:
%%capture
import pandas as pd
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
from torchmetrics import Accuracy, MeanSquaredError
import datetime
from sklearn.model_selection import StratifiedKFold, KFold

import glob
from tqdm import tqdm
import cv2
from PIL import Image
import os
import timm
from matplotlib import pyplot as plt
import argparse
import albumentations as A
from albumentations.pytorch import ToTensorV2
from pandarallel import pandarallel

pandarallel.initialize(use_memory_fs=False, nb_workers=os.cpu_count(), progress_bar=True)

In [3]:
timm.list_models('*resnet*t*', pretrained=True)

['eca_resnet33ts.ra2_in1k',
 'ecaresnet26t.ra2_in1k',
 'ecaresnet50t.a1_in1k',
 'ecaresnet50t.a2_in1k',
 'ecaresnet50t.a3_in1k',
 'ecaresnet50t.ra2_in1k',
 'ecaresnetlight.miil_in1k',
 'gcresnet33ts.ra2_in1k',
 'gcresnet50t.ra2_in1k',
 'inception_resnet_v2.tf_ens_adv_in1k',
 'inception_resnet_v2.tf_in1k',
 'lambda_resnet26rpt_256.c1_in1k',
 'lambda_resnet26t.c1_in1k',
 'lambda_resnet50ts.a1h_in1k',
 'resnet10t.c3_in1k',
 'resnet14t.c3_in1k',
 'resnet18.fb_ssl_yfcc100m_ft_in1k',
 'resnet18.fb_swsl_ig1b_ft_in1k',
 'resnet18.tv_in1k',
 'resnet26.bt_in1k',
 'resnet26d.bt_in1k',
 'resnet26t.ra2_in1k',
 'resnet32ts.ra2_in1k',
 'resnet33ts.ra2_in1k',
 'resnet34.bt_in1k',
 'resnet34.tv_in1k',
 'resnet50.bt_in1k',
 'resnet50.fb_ssl_yfcc100m_ft_in1k',
 'resnet50.fb_swsl_ig1b_ft_in1k',
 'resnet50.tv2_in1k',
 'resnet50.tv_in1k',
 'resnet101.tv2_in1k',
 'resnet101.tv_in1k',
 'resnet152.tv2_in1k',
 'resnet152.tv_in1k',
 'resnetaa50d.sw_in12k_ft_in1k',
 'resnetaa101d.sw_in12k_ft_in1k',
 'resnetblur50

In [5]:
args = dict(
    seed=42,
    model='resnet10t',
    lr=1e-3,
    weight_decay=0.0,
    log_dir='logs',
    num_workers=min(8, os.cpu_count()),
    epochs=100,
#   dropout=0.2,
    batch_size=32,
    repeats=1,
    accumulate_grad_batches=1,
    image_size=77,
    base_size=128,
    gpus='0',
    patience=10,
    precision='32',
    scheduler='onecycle',
)

args = argparse.Namespace(**args)

## Load the data

In [6]:
gt = pd.read_csv('train_gt.csv')
gt

Unnamed: 0,sample_index,P,K,Mg,pH
0,0,45.1,188.0,179.0,7.2
1,1,44.8,205.0,188.0,7.0
2,2,44.4,207.0,145.0,6.8
3,3,46.5,204.0,143.0,6.8
4,4,52.0,212.0,167.0,6.7
...,...,...,...,...,...
1727,1727,40.8,133.0,132.0,6.2
1728,1728,42.7,192.0,126.0,6.5
1729,1729,39.4,180.0,122.0,6.5
1730,1730,37.3,162.0,127.0,6.5


In [7]:
train = pd.DataFrame(glob.glob('train_data/*.npz'), columns=['filepath'])
train['sample_index'] = train.filepath.apply(lambda x: x.split(os.sep)[-1].split('.npz')[0]).astype('int')

train = train.sort_values('sample_index', ignore_index=True)
train = train[['sample_index', 'filepath']]

train = train.merge(gt, on='sample_index', how='left')
train

Unnamed: 0,sample_index,filepath,P,K,Mg,pH
0,0,train_data/0.npz,45.1,188.0,179.0,7.2
1,1,train_data/1.npz,44.8,205.0,188.0,7.0
2,2,train_data/2.npz,44.4,207.0,145.0,6.8
3,3,train_data/3.npz,46.5,204.0,143.0,6.8
4,4,train_data/4.npz,52.0,212.0,167.0,6.7
...,...,...,...,...,...,...
1727,1727,train_data/1727.npz,40.8,133.0,132.0,6.2
1728,1728,train_data/1728.npz,42.7,192.0,126.0,6.5
1729,1729,train_data/1729.npz,39.4,180.0,122.0,6.5
1730,1730,train_data/1730.npz,37.3,162.0,127.0,6.5


In [8]:
test = pd.DataFrame(glob.glob('test_data/*.npz'), columns=['filepath'])
test['sample_index'] = test.filepath.apply(lambda x: x.split(os.sep)[-1].split('.npz')[0]).astype('int')

test = test.sort_values('sample_index', ignore_index=True)
test = test[['sample_index', 'filepath']]

test

Unnamed: 0,sample_index,filepath
0,0,test_data/0.npz
1,1,test_data/1.npz
2,2,test_data/2.npz
3,3,test_data/3.npz
4,4,test_data/4.npz
...,...,...
1149,1149,test_data/1149.npz
1150,1150,test_data/1150.npz
1151,1151,test_data/1151.npz
1152,1152,test_data/1152.npz


In [9]:
train[['P', 'K', 'Mg', 'pH']].values

array([[ 45.1, 188. , 179. ,   7.2],
       [ 44.8, 205. , 188. ,   7. ],
       [ 44.4, 207. , 145. ,   6.8],
       ...,
       [ 39.4, 180. , 122. ,   6.5],
       [ 37.3, 162. , 127. ,   6.5],
       [ 29.5, 146. , 133. ,   6.3]])

In [10]:
def extract_data(file_name):
    with np.load(file_name) as npz:
        arr = np.ma.MaskedArray(**npz)
        data = arr.data
        mask = arr.mask
    return {'hsi': data, 'mask': mask}

In [None]:
%%time
train_data = train.filepath.parallel_apply(extract_data)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=109), Label(value='0 / 109'))), HB…

CPU times: user 4.42 s, sys: 5.79 s, total: 10.2 s
Wall time: 10.8 s


In [None]:
train[['hsi', 'mask']] = pd.DataFrame(train_data.tolist())

In [None]:
%%time
test_data = test.filepath.parallel_apply(extract_data)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=73), Label(value='0 / 73'))), HBox…

CPU times: user 2.78 s, sys: 4.31 s, total: 7.1 s
Wall time: 7.6 s


In [None]:
test[['hsi', 'mask']] = pd.DataFrame(test_data.tolist())

In [None]:
train.describe()

Unnamed: 0,sample_index,P,K,Mg,pH
count,1732.0,1732.0,1732.0,1732.0,1732.0
mean,865.5,70.302656,227.98851,159.281236,6.782719
std,500.12965,29.504784,61.891989,39.871829,0.260298
min,0.0,20.3,21.1,26.8,5.6
25%,432.75,51.175,189.0,134.0,6.6
50%,865.5,65.1,216.0,155.0,6.8
75%,1298.25,82.225,252.0,181.0,6.9
max,1731.0,325.0,625.0,400.0,7.8


In [None]:
train.head(2)

Unnamed: 0,sample_index,filepath,P,K,Mg,pH,hsi,mask
0,0,train_data/0.npz,45.1,188.0,179.0,7.2,"[[[487, 471, 460, 489, 493, 464, 466, 459, 447...","[[[True, True, True, True, True, True, True, T..."
1,1,train_data/1.npz,44.8,205.0,188.0,7.0,"[[[573, 539, 634, 588, 655, 519, 498, 508, 531...","[[[True, True, True, True, True, False, False,..."


In [None]:
train['hsi'].map(np.max).describe()

count     1732.000000
mean      2859.618360
std       1481.411535
min       1077.000000
25%       1734.000000
50%       2737.500000
75%       3665.250000
max      17488.000000
Name: hsi, dtype: float64

In [None]:
test['hsi'].map(np.max).describe()

count     1154.000000
mean      2922.344887
std       1606.327735
min       1064.000000
25%       1741.000000
50%       2816.500000
75%       3714.750000
max      16886.000000
Name: hsi, dtype: float64

### NOTE: **Divide by baseline solution (sample mean)**

In [None]:
label_columns = ['P', 'K', 'Mg', 'pH']

train[label_columns] = train[label_columns] / train[label_columns].mean(axis=0)

In [None]:
class HsiDataset(Dataset):
    def __init__(self, df, transforms=None, repeats=1):
        self.repeats = repeats
        self.ids = df['sample_index'].tolist()
        self.file_paths = df['filepath'].tolist()
        self.hsi = df['hsi'].tolist()
        self.masks = df['mask'].tolist()

        self.transforms = transforms
        if 'P' in df.columns:
            self.labels = df[['P', 'K', 'Mg', 'pH']].values

    def __len__(self):
        return len(self.ids) * self.repeats

    def __getitem__(self, idx):
        idx = idx % len(self.ids)
        _id = self.ids[idx]
        img = (self.hsi[idx]/10000).astype('float32')
        mask = 1 - self.masks[idx].astype('int')
        if self.transforms is not None:
            aug_img = self.transforms(image=img.transpose(1,2,0), mask=mask.transpose(1,2,0))
            img = aug_img['image'].transpose(2,0,1)
            mask = aug_img['mask'].transpose(2,0,1)

        sample = dict(
            image_id=_id,
            image=(img * mask).astype('float32'),
            mask=mask,
        )

        if hasattr(self, 'labels'):
            sample['label'] = self.labels[idx].astype('float32')
        return sample

In [None]:
%%time
dst = HsiDataset(train)

CPU times: user 872 µs, sys: 0 ns, total: 872 µs
Wall time: 860 µs


In [None]:
sample = dst[123]

In [None]:
sample['image'].dtype, sample['mask'].dtype

(dtype('float32'), dtype('int64'))

In [None]:
sample['image'].shape, sample['mask'].shape

((150, 11, 11), (150, 11, 11))

In [None]:
def get_transforms(train=False, image_size=11, base_size=60, debug=False):
    augmentations = []
    augmentations.append(
        A.Resize(height=image_size, width=image_size)
    )
    if train:
        augmentations.extend([
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.Rotate(p=0.5, ),
            A.Transpose(p=0.5),
            A.MultiplicativeNoise(multiplier=(0.99, 1.01), per_channel=True, elementwise=True),
        ])

    transforms = A.Compose(augmentations)
    return transforms

In [None]:
dst = HsiDataset(train, transforms=get_transforms())

In [None]:
sample = dst[123]

In [None]:
from functools import wraps
import gc

def flush_and_gc(f):
    @wraps(f)
    def g(*args, **kwargs):
        torch.cuda.empty_cache()
        gc.collect()
        return f(*args, **kwargs)
    return g

In [None]:
class HsiModel(pl.LightningModule):
    def __init__(self, args):
        super().__init__()
        self.save_hyperparameters()

        self.net = timm.create_model(
            model_name=args.model,
            in_chans=150,
            num_classes=4,
            pretrained=True,
#             dropout=args.dropout,
            global_pool='avgmax',
        )
        self.softplus = nn.Softplus()
        self.criterion = nn.MSELoss()

        self._metric = MeanSquaredError(squared=False)

    def forward(self, image):
        return self.softplus(self.net(image))

    def training_step(self, batch, batch_idx=0):
        image = batch['image']
        label = batch['label']

        pred = self(image)

        loss = self.criterion(pred, label)
        rmse = self._metric(pred, label)
        metrics = {'rmse': rmse}

        self.log('lr', self.optimizers().param_groups[0]['lr'], prog_bar=True)
        self.log('loss', loss, batch_size=image.shape[0], prog_bar=True, on_step=True)
        self.log_dict(metrics, batch_size=image.shape[0], prog_bar=True, on_step=False, on_epoch=True)

        return loss

    def validation_step(self, batch, batch_idx=0):
        image = batch['image']
        label = batch['label']

        pred = self(image)
        loss = self.criterion(pred, label)
        rmse = self._metric(pred, label)
        metrics = {'val_loss': loss, 'val_rmse': rmse}

        self.log_dict(metrics, batch_size=image.shape[0], prog_bar=True, on_epoch=True)

    def predict_step(self, batch, batch_idx=0):
        image = batch['image']
        sample_id = batch['image_id']

        pred = self(image)
        return {'image_id': sample_id, 'pred': pred}

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.hparams.args.lr, weight_decay=self.hparams.args.weight_decay)
        scheduler_name = self.hparams.args.scheduler
        if scheduler_name == 'onecycle':
            scheduler = optim.lr_scheduler.OneCycleLR(optimizer, pct_start=0.1, total_steps=self.trainer.estimated_stepping_batches, max_lr=self.hparams.args.lr)
            interval = 'step'
        elif scheduler_name == 'plateau':
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=self.hparams.args.patience//2)
            interval = 'epoch'

        scheduler = {
            'scheduler': scheduler,
            'interval': interval,
            'monitor': 'val_rmse'
        }
        return {"optimizer": optimizer, "lr_scheduler": scheduler}

In [None]:
train['fold'] = -1
skf = KFold(n_splits=5, shuffle=True, random_state=args.seed)
for fold, (t_idx, v_idx) in enumerate(skf.split(range(len(train)))):
    train.loc[v_idx, 'fold'] = fold

In [None]:
train.fold.value_counts()

fold
0    347
1    347
2    346
3    346
4    346
Name: count, dtype: int64

In [None]:
import gc
import inspect
import shutil
import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks.progress.tqdm_progress import TQDMProgressBar


def remove_dir(path):
    try:
        shutil.rmtree(path)
    except:
        pass


def free_memory(to_delete: list):
    calling_namespace = inspect.currentframe().f_back

    for _var in to_delete:
        calling_namespace.f_locals.pop(_var, None)
        gc.collect()
        torch.cuda.empty_cache()


def get_callbacks(args, fold=None):
    start_name = ""
    if fold is not None:
        start_name = f"fold{fold}-"

    checkpoint_callback = pl.callbacks.ModelCheckpoint(
          filename=start_name + "{epoch}-{val_loss:0.4f}-{val_rmse:0.4f}",
          monitor='val_rmse',
          verbose=False,
          save_last=False,
          save_top_k=1,
          mode='min',
          save_weights_only=True
    )

    early_stop_callback = pl.callbacks.EarlyStopping(
        monitor="val_rmse",
        patience=args.patience,
        verbose=True,
        mode='min',
        strict=True,
        check_finite=True,
        check_on_train_epoch_end=False
    )
    prog_rate = TQDMProgressBar(refresh_rate=10) #20)

    return [
        checkpoint_callback,
        early_stop_callback,
        prog_rate,
    ]

In [None]:
try:
    torch.set_float32_matmul_precision('high')
except:
    pass

In [None]:
# initial reproducibility
pl.seed_everything(args.seed, workers=True)
os.environ["CUBLAS_WORKSPACE_CONFIG"]=":4096:2"

Global seed set to 42


In [None]:
date_time = datetime.datetime.now().strftime("%m%d-%H%M")

name = args.model
version = name + '_' + date_time

# SET LOGGER
tb_logger = pl.loggers.TensorBoardLogger(
    save_dir=args.log_dir,
    name=name,
    version=version,
)

print('\n')
print(args)
print(version)
print('\n')

checkpoint_paths = []
fold_scores = []

for fold in range(5):
    print(f"\n#####Starting fold {fold}#####\n")

    # make reproducible
    pl.seed_everything(args.seed, workers=True)

    # call the model here
    model = HsiModel(args)

    # dataloaders
    dst_train=HsiDataset(
        df=train[train['fold']!=fold].reset_index(drop=True),
        transforms=get_transforms(train=True, image_size=args.image_size, base_size=args.base_size),
        repeats=args.repeats,
    )
    dst_val= HsiDataset(
        df=train[train['fold']==fold].reset_index(drop=True),
        transforms=get_transforms(train=False, image_size=args.image_size, base_size=args.base_size),
        repeats=1,
    )

    train_loader = DataLoader(
        dst_train, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, drop_last=True)
    val_loader = DataLoader(
        dst_val, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False)

    # Initialize simple Callbacks
    callbacks = get_callbacks(args, fold=fold)

    # Initialize a trainer
    trainer = pl.Trainer(
        callbacks=callbacks,
        accelerator='gpu',
        devices=[int(t) for t in args.gpus.split(',')],
        max_epochs=args.epochs,
        logger=tb_logger,
        num_sanity_val_steps=0,
        accumulate_grad_batches=args.accumulate_grad_batches,
        precision=args.precision,
    )

    # Train the model
    trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)

    # include the paths in the list
    checkpoint_path = trainer.checkpoint_callback.best_model_path
    score = trainer.early_stopping_callback.best_score

    checkpoint_paths.append(checkpoint_path)
    fold_scores.append(score)

    free_memory([model, trainer])

Global seed set to 42




Namespace(seed=42, model='resnet10t', lr=0.001, weight_decay=0.0, log_dir='logs', num_workers=8, epochs=100, batch_size=32, repeats=1, accumulate_grad_batches=1, image_size=77, base_size=128, gpus='0', patience=10, precision='32', scheduler='onecycle')
resnet10t_1003-1304



#####Starting fold 0#####



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.
  rank_zero_warn(

  | Name      | Type             | Params
-----------------------------------------------
0 | net       | ResNet           | 5.0 M 
1 | softplus  | Softplus         | 0     
2 | criterion | MSELoss          | 0     
3 | _metric   | MeanSquaredError | 0     
-----------------------------------------------
5.0 M     Trainable params
0         Non-trainable params
5.0 M     Total params
19.825    Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved. New best score: 0.600


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.137 >= min_delta = 0.0. New best score: 0.463


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.121 >= min_delta = 0.0. New best score: 0.342


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.035 >= min_delta = 0.0. New best score: 0.307


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.030 >= min_delta = 0.0. New best score: 0.276


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.011 >= min_delta = 0.0. New best score: 0.265


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.018 >= min_delta = 0.0. New best score: 0.247


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.247


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.244


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.243


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.242


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Monitored metric val_rmse did not improve in the last 10 records. Best score: 0.242. Signaling Trainer to stop.
Global seed set to 42



#####Starting fold 1#####



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.
  rank_zero_warn(

  | Name      | Type             | Params
-----------------------------------------------
0 | net       | ResNet           | 5.0 M 
1 | softplus  | Softplus         | 0     
2 | criterion | MSELoss          | 0     
3 | _metric   | MeanSquaredError | 0     
-----------------------------------------------
5.0 M     Trainable params
0         Non-trainable params
5.0 M     Total params
19.825    Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved. New best score: 0.534


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.055 >= min_delta = 0.0. New best score: 0.479


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.093 >= min_delta = 0.0. New best score: 0.386


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.057 >= min_delta = 0.0. New best score: 0.329


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.031 >= min_delta = 0.0. New best score: 0.298


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.298


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.016 >= min_delta = 0.0. New best score: 0.282


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.280


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.008 >= min_delta = 0.0. New best score: 0.273


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.271


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.271


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.269


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.268


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.266


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Monitored metric val_rmse did not improve in the last 10 records. Best score: 0.266. Signaling Trainer to stop.
Global seed set to 42



#####Starting fold 2#####



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.
  rank_zero_warn(

  | Name      | Type             | Params
-----------------------------------------------
0 | net       | ResNet           | 5.0 M 
1 | softplus  | Softplus         | 0     
2 | criterion | MSELoss          | 0     
3 | _metric   | MeanSquaredError | 0     
-----------------------------------------------
5.0 M     Trainable params
0         Non-trainable params
5.0 M     Total params
19.825    Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved. New best score: 0.544


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.038 >= min_delta = 0.0. New best score: 0.506


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.163 >= min_delta = 0.0. New best score: 0.343


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.044 >= min_delta = 0.0. New best score: 0.299


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.013 >= min_delta = 0.0. New best score: 0.286


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.284


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.005 >= min_delta = 0.0. New best score: 0.278


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.009 >= min_delta = 0.0. New best score: 0.269


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.005 >= min_delta = 0.0. New best score: 0.264


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.263


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.261


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.260


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.260


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.004 >= min_delta = 0.0. New best score: 0.256


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.256


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.254


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Monitored metric val_rmse did not improve in the last 10 records. Best score: 0.254. Signaling Trainer to stop.
Global seed set to 42



#####Starting fold 3#####



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.
  rank_zero_warn(

  | Name      | Type             | Params
-----------------------------------------------
0 | net       | ResNet           | 5.0 M 
1 | softplus  | Softplus         | 0     
2 | criterion | MSELoss          | 0     
3 | _metric   | MeanSquaredError | 0     
-----------------------------------------------
5.0 M     Trainable params
0         Non-trainable params
5.0 M     Total params
19.825    Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved. New best score: 0.572


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.023 >= min_delta = 0.0. New best score: 0.549


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.122 >= min_delta = 0.0. New best score: 0.427


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.073 >= min_delta = 0.0. New best score: 0.354


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.027 >= min_delta = 0.0. New best score: 0.327


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.021 >= min_delta = 0.0. New best score: 0.306


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.010 >= min_delta = 0.0. New best score: 0.296


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.006 >= min_delta = 0.0. New best score: 0.290


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.003 >= min_delta = 0.0. New best score: 0.287


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.000 >= min_delta = 0.0. New best score: 0.286


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.004 >= min_delta = 0.0. New best score: 0.282


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.281


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.004 >= min_delta = 0.0. New best score: 0.277


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.275


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Monitored metric val_rmse did not improve in the last 10 records. Best score: 0.275. Signaling Trainer to stop.
Global seed set to 42



#####Starting fold 4#####



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.
  rank_zero_warn(

  | Name      | Type             | Params
-----------------------------------------------
0 | net       | ResNet           | 5.0 M 
1 | softplus  | Softplus         | 0     
2 | criterion | MSELoss          | 0     
3 | _metric   | MeanSquaredError | 0     
-----------------------------------------------
5.0 M     Trainable params
0         Non-trainable params
5.0 M     Total params
19.825    Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved. New best score: 0.560


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.076 >= min_delta = 0.0. New best score: 0.484


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.112 >= min_delta = 0.0. New best score: 0.372


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.068 >= min_delta = 0.0. New best score: 0.304


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.021 >= min_delta = 0.0. New best score: 0.283


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.017 >= min_delta = 0.0. New best score: 0.266


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.004 >= min_delta = 0.0. New best score: 0.262


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.002 >= min_delta = 0.0. New best score: 0.260


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.006 >= min_delta = 0.0. New best score: 0.254


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.253


Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.001 >= min_delta = 0.0. New best score: 0.252


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric val_rmse improved by 0.005 >= min_delta = 0.0. New best score: 0.247


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Monitored metric val_rmse did not improve in the last 10 records. Best score: 0.247. Signaling Trainer to stop.


In [None]:
fold_scores

[tensor(0.2419, device='cuda:0'),
 tensor(0.2663, device='cuda:0'),
 tensor(0.2545, device='cuda:0'),
 tensor(0.2747, device='cuda:0'),
 tensor(0.2474, device='cuda:0')]

# **SUBMISSION:**

In [None]:
os.makedirs('submissions', exist_ok=True)

In [None]:
class PredictionModel(pl.LightningModule):
    def __init__(self, ckpt_paths):
        super().__init__()
        if isinstance(ckpt_paths, str):
            ckpt_paths = [ckpt_paths]

        self.models = nn.ModuleList([
            HsiModel.load_from_checkpoint(ckpt_path, map_location='cpu') for ckpt_path in ckpt_paths
        ])

    def forward(self, image):
        out = torch.stack([model(image) for model in self.models], dim=0).mean(dim=0)
        return out

    def predict_step(self, batch, batch_idx=0):
        image_id = batch['image_id']
        image = batch['image']
        pred = self(image)
        return {'id': image_id.cpu().numpy(), 'pred': pred.cpu().numpy()}

In [None]:
checkpoint_paths

['logs/resnet10t/resnet10t_1003-1304/checkpoints/fold0-epoch=23-val_loss=0.0609-val_rmse=0.2419.ckpt',
 'logs/resnet10t/resnet10t_1003-1304/checkpoints/fold1-epoch=30-val_loss=0.0819-val_rmse=0.2663.ckpt',
 'logs/resnet10t/resnet10t_1003-1304/checkpoints/fold2-epoch=27-val_loss=0.0691-val_rmse=0.2545.ckpt',
 'logs/resnet10t/resnet10t_1003-1304/checkpoints/fold3-epoch=22-val_loss=0.0826-val_rmse=0.2747.ckpt',
 'logs/resnet10t/resnet10t_1003-1304/checkpoints/fold4-epoch=17-val_loss=0.0662-val_rmse=0.2474.ckpt']

In [None]:
dst_test = HsiDataset(test, transforms=get_transforms(train=False, image_size=args.image_size, base_size=args.base_size))
test_loader = DataLoader(dst_test, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers)

In [None]:
checkpoint_paths = [
    'logs/resnet10t/resnet10t_1003-1304/checkpoints/fold0-epoch=23-val_loss=0.0609-val_rmse=0.2419.ckpt',
    'logs/resnet10t/resnet10t_1003-1304/checkpoints/fold1-epoch=30-val_loss=0.0819-val_rmse=0.2663.ckpt',
    'logs/resnet10t/resnet10t_1003-1304/checkpoints/fold2-epoch=27-val_loss=0.0691-val_rmse=0.2545.ckpt',
    'logs/resnet10t/resnet10t_1003-1304/checkpoints/fold3-epoch=22-val_loss=0.0826-val_rmse=0.2747.ckpt',
    'logs/resnet10t/resnet10t_1003-1304/checkpoints/fold4-epoch=17-val_loss=0.0662-val_rmse=0.2474.ckpt'
]

In [None]:
model = PredictionModel(checkpoint_paths)

In [None]:
trainer = pl.Trainer(
    accelerator='gpu',
    devices=[int(t) for t in args.gpus.split(',')],
    logger=None,
    precision=args.precision,
)

# Train the model
preds = trainer.predict(model, dataloaders=test_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: 0it [00:00, ?it/s]

In [None]:
label_columns

['P', 'K', 'Mg', 'pH']

In [None]:
preds1 = pd.DataFrame(preds).explode(column=['id', 'pred'], ignore_index=True)
preds1['tags'] = [label_columns]*len(preds1)
preds1 = preds1.explode(column=['tags', 'pred']).rename(columns={'pred': 'Target'})
preds1['sample_index'] = preds1['id'].astype('str') + '_' + preds1['tags']

preds1 = preds1[['sample_index', 'Target']]
preds1.head()

Unnamed: 0,sample_index,Target
0,0_P,0.99551
0,0_K,0.968869
0,0_Mg,1.045436
0,0_pH,1.030559
1,1_P,0.960749


In [None]:
preds1.to_csv(f'submissions/{version}.csv', index=False)

In [None]:
version

'resnet10t_1003-1304'