In [1]:
import random
import torch
import numpy as np


def setup_reproducibility(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(False, warn_only=True)
    torch.set_float32_matmul_precision("high")
    
SEED = 1000
setup_reproducibility(SEED)

In [2]:
from transformers import get_cosine_schedule_with_warmup
from scipy import signal
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from huggingface_hub import login, snapshot_download
from tqdm.auto import tqdm


def get_ckpt(path):
    return torch.load(path, weights_only=False)


def cuda_to_np(tensor):
    return tensor.cpu().detach().numpy()


def get_scheduler(optimizer, train_dl, epochs):
    total_training_steps = len(train_dl) * epochs
    warmup_steps = int(total_training_steps * 0.05)  # e.g. 5% warmup
    
    return get_cosine_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=warmup_steps,
        num_training_steps=total_training_steps
    )


def get_stats(tensor, p=True, r=False, minmax=False):
    if minmax:
        min, max = tensor.min(), tensor.max()
        mean, std = tensor.mean(), tensor.std()
        if p: print(f"Min: {min}, Max: {max} ,Mean: {mean}, Std: {std}")
        if r: return min, max, mean, std
    else:
        mean, std = tensor.mean(), tensor.std()
        if p: print(f"Mean: {mean}, Std: {std}")
        if r: return mean, std
    
    
def zscore(tensor, mean=None, std=None):
    if mean is None: mean = tensor.mean()
    if std is None: std = tensor.std()
    return (tensor - mean) / (std + 1e-8)


def get_model_size(model):
    print(sum(p.numel() for p in model.parameters()) / 1e6)
    

def get_index(iterable):
    return random.randint(0, len(iterable) - 1)


def get_indices(iterable, n):
    return random.sample(range(len(iterable)), n)


def split(inputs, targets, seed):
    return train_test_split(
        inputs,
        targets, 
        test_size=0.2,
        shuffle=True, 
        random_state=seed
    ) 


def show_waves(waves, dpi=100):
    """
    waves: numpy array of shape (3, N)
    Creates three separate figures that stretch wide.
    """
    N = waves.shape[1]
    t = np.arange(N)

    # Wide aspect ratio; height modest so each window fills width
    for i in range(waves.shape[0]):
        fig = plt.figure(figsize=(14, 4), dpi=dpi)  # wide figure
        ax = fig.add_subplot(111)
        ax.plot(t, waves[i], linewidth=1)
        ax.set_title(f"Wave {i+1}")
        ax.set_xlabel("Sample")
        ax.set_ylabel("Amplitude")
        ax.grid(True)
        fig.tight_layout()  # reduce margins to use width
        
    plt.show()
    
    
def hf_ds_download(hf_token, repo_id):
    login(hf_token[1:])
    return snapshot_download(repo_id, repo_type="dataset")


def get_spectra_features(X, b=False):
    """Create multi-channel features from spectra: raw, 1st derivative, 2nd derivative."""
    X_processed = np.zeros_like(X)
    # Baseline correction and SNV
    for i in tqdm(range(X.shape[0])):
        poly = np.polyfit(np.arange(X.shape[1]), X[i], 3)
        baseline = np.polyval(poly, np.arange(X.shape[1]))
        corrected_spec = X[i] - baseline
        #X_processed[i] = (corrected_spec - corrected_spec.mean()) / (corrected_spec.std() + 1e-8)
        X_processed[i] = corrected_spec
        
    # Calculate derivatives
    deriv1 = signal.savgol_filter(X_processed, window_length=11, polyorder=3, deriv=1, axis=1)
    deriv2 = signal.savgol_filter(X_processed, window_length=11, polyorder=3, deriv=2, axis=1)

    if b: return np.stack([X_processed, deriv1, deriv2], axis=1)
    return np.stack([deriv1, deriv2], axis=1)

In [3]:
import os

path = "/kaggle/input/dig-4-bio-raman-transfer-learning-challenge"
files = os.listdir(path)
[(i, files[i]) for i in range(len(files))]

[(0, 'sample_submission.csv'),
 (1, 'timegate.csv'),
 (2, 'mettler_toledo.csv'),
 (3, 'kaiser.csv'),
 (4, 'anton_532.csv'),
 (5, 'transfer_plate.csv'),
 (6, '96_samples.csv'),
 (7, 'tornado.csv'),
 (8, 'tec5.csv'),
 (9, 'metrohm.csv'),
 (10, 'anton_785.csv')]

In [4]:
def load_test_data():
    test = pd.read_csv(os.path.join(path, files[6]))

    row1 = test.columns[1:].to_numpy().copy()
    row1[-1] = "5611"
    row1 = row1.astype(np.float64)


    cols = test.columns[1:]
    test = test[cols]
    test[" 5611]"] = test[" 5611]"].str.replace('[\[\]]', '', regex=True).astype('int64')
    test = test.to_numpy()

    test = np.insert(test, 0, row1, axis=0)
    return test.reshape(-1, 2, 2048).mean(axis=1)


def get_test_data():
    inputs = load_test_data()
    
    spectra_selection = np.logical_and(
        300 <= np.array([float(one) for one in range(2048)]),
        np.array([float(one) for one in range(2048)]) <= 1942,
    )
    
    inputs = inputs[:, spectra_selection]

    wns = np.array([
        float(one) for one in range(2048)
    ])[spectra_selection]
    wavenumbers = np.arange(300, 1943)

    interpolated_data = np.array(
        [np.interp(wavenumbers, xp=wns, fp=i) for i in inputs]
    )

    normed_spectra = interpolated_data / np.max(interpolated_data)
    return normed_spectra
    

In [5]:
import numpy as np
import pandas as pd


def load_transfer_data():
    csv_path = os.path.join(path, files[5])
    df = pd.read_csv(csv_path)

    input_cols = df.columns[1:2049]
    target_cols = df.columns[2050:]

    targets  = df[target_cols].dropna().to_numpy()

    df = df[input_cols]
    df['Unnamed: 1'] = df['Unnamed: 1'].str.replace("[\[\]]", "", regex=True).astype('int64')
    df['Unnamed: 2048'] = df['Unnamed: 2048'].str.replace("[\[\]]", "", regex=True).astype('int64')

    inputs = df.to_numpy().reshape(-1, 2, 2048)
    inputs = inputs.mean(axis=1)
    
    return inputs, targets


def preprocess_transfer_data():
    inputs, targets = load_transfer_data()
    
    spectra_selection = np.logical_and(
        300 <= np.array([float(one) for one in range(2048)]),
        np.array([float(one) for one in range(2048)]) <= 1942,
    )
    
    inputs = inputs[:, spectra_selection]
    
    wns = np.array([
        float(one) for one in range(2048)
    ])[spectra_selection]
    wavenumbers = np.arange(300, 1943)
    
    interpolated_data = np.array(
        [np.interp(wavenumbers, xp=wns, fp=i) for i in inputs]
    )
    
    normed_spectra = interpolated_data / np.max(interpolated_data)
    return normed_spectra, targets

inputs, targets = preprocess_transfer_data()
inputs.shape, targets.shape

((96, 1643), (96, 3))

In [6]:
import random
import torch
from torch.utils.data import Dataset
import scipy.optimize


np_dtype_from_torch = {
    torch.float32: np.float32,
    torch.float64: np.float64,
}

class SpectralDataset(Dataset):
    def __init__(
        self,
        spectra,
        concentrations,
        dtype=None,
        spectra_mean_std=None,
        concentration_mean_std=None,
        combine_spectra_range=0.0,
        baseline_factor_bound=0.0,
        baseline_period_lower_bound=100.0,
        baseline_period_upper_bound=200.0,
        augment_slope_std=0.0,
        augment_intersept_std=0.0,
        rolling_bound=0,
        spectrum_rolling_sigma=0.0,
        augmentation_weight=0.1,
        original_datapoint_weight=1.,
    ):
        self.dtype = dtype or torch.float32
        self.combine_spectra_range = combine_spectra_range
        self.baseline_factor_bound = baseline_factor_bound
        self.augment_slope_std = augment_slope_std
        self.augment_intercept_std = augment_intersept_std
        self.baseline_period_lower_bound = baseline_period_lower_bound
        self.baseline_period_upper_bound = baseline_period_upper_bound
        self.rolling_bound = rolling_bound
        self.spectrum_rolling_sigma = spectrum_rolling_sigma
        self.augmentation_weight = torch.tensor(augmentation_weight, dtype=dtype)
        self.original_dp_weight = original_datapoint_weight

        # normalize spectra
        spectra = torch.tensor(spectra, dtype=dtype)

        if spectra_mean_std is None:
            self.s_mean = torch.mean(spectra)
            self.s_std = torch.std(spectra)
        else:
            self.s_mean, self.s_std = spectra_mean_std

        self.spectra = torch.divide(
            torch.subtract(spectra, self.s_mean),
            self.s_std,
        )

        self.dummy_wns = np.tile(
            np.arange(
                0., 1., 1. / self.spectra.shape[2],
                dtype=np_dtype_from_torch[self.dtype]
            )[None, :self.spectra.shape[2]],
            (self.spectra.shape[1], 1),
        )

        # normalize concentrations
        concentrations = torch.tensor(concentrations, dtype=dtype)
        if concentration_mean_std is None:
            self.concentration_means = torch.nanmean(concentrations, dim=0)

            self.concentration_stds = torch.maximum(
                torch.tensor(
                    [
                        torch.std(col[torch.logical_not(torch.isnan(col))])
                        for col in concentrations.T
                    ]
                ),
                torch.tensor([1e-3] * concentrations.shape[1]),
            )
        else:
            self.concentration_means = concentration_mean_std[0]
            self.concentration_stds = concentration_mean_std[1]

        self.concentrations = torch.divide(
            torch.subtract(
                concentrations,
                self.concentration_means,
            ),
            self.concentration_stds,
        )

    def pick_two(self, max_idx=None):
        max_idx = max_idx or len(self)
        return random.choices(range(max_idx), k=2)

    def __len__(self):
        return len(self.concentrations)

    def augment_spectra(self, spectra):
        if self.augment_slope_std > 0.0:

            def spectrum_approximation(x, slope, intercept):
                return (slope * x + intercept).reshape(-1, 1)[:, 0]

            slope, inter = scipy.optimize.curve_fit(
                spectrum_approximation,
                self.dummy_wns,
                spectra.reshape(-1, 1)[:, 0],
                p0=np.random.rand(2),
            )[0]

            new_slope = slope * (
                    np.random.gamma(
                        shape=1. / self.augment_slope_std,
                        scale=self.augment_slope_std,
                        size=1,
                    )
            )[0]
            new_intercept = inter * (
                1.0 + np.random.randn(1) * self.augment_intercept_std
            )[0]
            spectra += torch.tensor(
                (new_slope - slope)
            ) * self.dummy_wns + new_intercept - inter

        factor = self.baseline_factor_bound * torch.rand(size=(1,))
        offset = torch.rand(size=(1,)) * 2.0 * torch.pi
        period = self.baseline_period_lower_bound + (
            self.baseline_period_upper_bound - self.baseline_period_lower_bound
        ) * torch.rand(size=(1,))
        permutations = factor * torch.cos(
            2.0 * torch.pi / period * self.dummy_wns + offset
        )
        return self.roll_spectrum(
            spectra + permutations * spectra,
            delta=random.randint(-self.rolling_bound, self.rolling_bound),
        )

    def roll_spectrum(self, spectra, delta):
        num_spectra = spectra.shape[0]
        rolled_spectra = np.roll(spectra, delta, axis=1)
        if delta > 0:
            rolled_spectra[:, :delta] = (
                np.random.rand(num_spectra, delta) * self.spectrum_rolling_sigma + 1
            ) * rolled_spectra[:, delta:(delta + 1)]
        elif delta < 0:
            rolled_spectra[:, delta:] = (
                np.random.rand(num_spectra, -delta) * self.spectrum_rolling_sigma + 1
            ) * rolled_spectra[:, delta - 1:delta]
        return rolled_spectra

    def combine_k_items(self, indices, weights):
        return (
            # spectra
            torch.sum(
                torch.mul(weights[:, None, None], self.spectra[indices, :, :]),
                dim=0,
            ),
            # concentrations
            torch.sum(
                torch.mul(weights[:, None], self.concentrations[indices, :]),
                dim=0,
            )
        )

    def __getitem__(self, idx):
        if self.combine_spectra_range < 1e-12:
            spectrum = self.spectra[idx]
            spectrum = self.augment_spectra(spectrum)
            return {
                "spectra": spectrum,
                "concentrations": self.concentrations[idx],
                "label_weight": torch.tensor(1.0, dtype=self.dtype),
            }
        else:
            if random.random() < self.original_dp_weight:
                one_weight = 1.
                label_weight = torch.tensor(1.0, dtype=self.dtype)
            else:
                one_weight = random.uniform(0.0, self.combine_spectra_range)
                label_weight = self.augmentation_weight
            weights = torch.tensor([one_weight, (1 - one_weight)])
            # just pick two random indices
            indices = random.choices(range(len(self)), k=2)

            mixed_spectra, mixed_concentrations = self.combine_k_items(
                indices=indices,
                weights=weights,
            )
            mixed_spectra = self.augment_spectra(mixed_spectra)
            return mixed_spectra, mixed_concentrations, label_weight


config = {
    'initial_cnn_channels': 32,
    'cnn_channel_factor': 1.279574024454846,
    'num_cnn_layers': 8,
    'kernel_size': 3,
    'stride': 2,
    'activation_function': 'ELU',
    'fc_dropout': 0.10361700399831791,
    'lr': 0.001,
    'gamma': 0.9649606352621118,
    'baseline_factor_bound': 0.748262317340447,
    'baseline_period_lower_bound': 0.9703081695287203,
    'baseline_period_span': 19.79744237606427,
    'original_datapoint_weight': 0.4335003268130408,
    'augment_slope_std': 0.08171025264382692,
    'batch_size': 32,
    'fc_dims': 226,
    'rolling_bound': 2,
    'num_blocks': 2,
}

def get_dataset(inputs, targets, config, inputs_mean_std=None, targets_mean_std=None):
    return SpectralDataset(
        spectra=inputs[:, None, :],
        concentrations=targets,
        dtype=torch.float32,
        spectra_mean_std=inputs_mean_std,
        concentration_mean_std=targets_mean_std,
        combine_spectra_range=1.0,
        baseline_factor_bound=config["baseline_factor_bound"],
        baseline_period_lower_bound=config["baseline_period_lower_bound"],
        baseline_period_upper_bound=(config["baseline_period_lower_bound"] + config["baseline_period_span"]),
        augment_slope_std=config["augment_slope_std"],
        augment_intersept_std=0.0,
        rolling_bound=config["rolling_bound"],
        spectrum_rolling_sigma=0.01,
        augmentation_weight=0.1,
        original_datapoint_weight=1.,
    )

In [7]:
from torch.utils.data import DataLoader


def build_loader(
    SEED,
    ds,
    train=True,
    batch_size=1,
    shuffle=False,
    num_workers=4,
    drop_last=True,
    pin_memory=True,
    persistent_workers=False,
):
    def seed_worker(worker_id):
        worker_seed = torch.initial_seed() % 2**32
        np.random.seed(worker_seed)
        random.seed(worker_seed)

    generator = torch.Generator()
    generator.manual_seed(SEED if train else SEED+5232)

    return DataLoader(
        ds,
        batch_size=batch_size,
        shuffle=shuffle,
        num_workers=num_workers,
        pin_memory=pin_memory,
        drop_last=drop_last,
        persistent_workers=persistent_workers,
        worker_init_fn=seed_worker,
        generator=generator,
        #sampler=DistributedSampler(
        #    train_ds,
        #    shuffle=True,
        #    drop_last=True,
        #    seed=config.seed
        #)
    )
    
    
def return_dls(train_ds, eval_ds, train_batch_size, eval_batch_size):
    train_dl = build_loader(
        SEED,
        train_ds,
        train=True,
        batch_size=train_batch_size,
        shuffle=True,
        num_workers=0,
        drop_last=False,
        pin_memory=True,
        persistent_workers=False,
    )

    eval_dl = build_loader(
        SEED,
        eval_ds,
        train=False,
        batch_size=eval_batch_size,
        shuffle=False,
        num_workers=0,
        drop_last=False,
        pin_memory=True,
        persistent_workers=False,
    )
    
    return train_dl, eval_dl

In [8]:
import neptune


def setup_neptune():
    if not RESUME:
        neptune_run = neptune.init_run(
            project="arbaaz/kaggle-spect",
            name=MODEL_NAME,
            api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJlOGE2YjNiZS1mZGUyLTRjYjItYTg5Yy1mZWJkZTIzNzE1NmIifQ=="
        )

        neptune_run["h_parameters"] = {
            "seed": SEED,
            "model_name": MODEL_NAME,
            "optimizer_name": "nadam",
            "learning_rate": LR,
            "scheduler_name": "default",
            "weight_decay": WD,
            "num_epochs": EPOCHS,
            "batch_size": BATCH_SIZE,
        }
        if DROPOUT: neptune_run["h_parameters"] = {"dropout": DROPOUT}
        if DROP_PATH_RATE: neptune_run["h_parameters"] = {"drop_path_rate": DROP_PATH_RATE}
    else:
        neptune_run = neptune.init_run(
            project="arbaaz/crunchdao-structural-break",
            with_id=config.with_id,
            api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJlOGE2YjNiZS1mZGUyLTRjYjItYTg5Yy1mZWJkZTIzNzE1NmIifQ=="
        )

    return neptune_run

In [9]:
import torch.nn.functional as F
from torch.nn.modules.loss import _Loss
from sklearn.metrics import r2_score


def loss_fn(logits, targets):
    logits = logits.view(-1)
    targets = targets.view(-1)
    return F.mse_loss(logits, targets)


def metric_fn(logits, targets):
    preds = logits.cpu().detach().numpy()
    targets = targets.cpu().detach().numpy()
    
    dim1 = r2_score(targets[:, 0], preds[:, 0])
    dim2 = r2_score(targets[:, 1], preds[:, 1])
    dim3 = r2_score(targets[:, 2], preds[:, 2])
    
    return dim1, dim2, dim3, r2_score(targets, logits)


class MSEIgnoreNans(_Loss):
    def forward(
        self,
        input: torch.Tensor,
        target: torch.Tensor,
        weights: torch.Tensor,
    ) -> torch.Tensor:
        mask = torch.isfinite(target)
        mse = torch.mean(
            torch.mul(
                torch.square(input[mask] - target[mask]),
                torch.tile(weights[:, None], dims=(1, target.shape[1]))[mask],
            )
        )
        return torch.where(
            torch.isfinite(mse),
            mse,
            torch.tensor(0.).to(target.device),
        )

In [10]:
import math


class Identity(torch.torch.nn.Module):
    def forward(self, x):
        return x


# this is not a resnet yet
class ReZeroBlock(torch.torch.nn.Module):
    def __init__(
        self,
        in_channels,
        out_channels,
        activation_function,
        kernel_size,
        stride,
        dtype,
        norm_layer=None,
    ):
        super(ReZeroBlock, self).__init__()
        if norm_layer is None:
            norm_layer = torch.torch.nn.BatchNorm1d

        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = divmod(kernel_size, 2)[0] if stride == 1 else 0

        # does not change spatial dimension
        self.conv1 = torch.nn.Conv1d(
            in_channels,
            out_channels,
            kernel_size=1,
            stride=1,
            bias=False,
            dtype=dtype,
        )
        self.bn1 = norm_layer(out_channels, dtype=dtype)
        # Both self.conv2 and self.downsample layers
        # downsample the input when stride != 1
        self.conv2 = torch.nn.Conv1d(
            out_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=stride,
            groups=out_channels,
            bias=False,
            dtype=dtype,
            padding=self.padding,
        )
        if stride > 1:
            down_conv = torch.nn.Conv1d(
                in_channels,
                out_channels,
                kernel_size=kernel_size,
                stride=stride,
                bias=False,
                dtype=dtype,
                # groups=out_channels,
            )
        else:
            down_conv = Identity()

        self.down_sample = torch.nn.Sequential(
            down_conv,
            norm_layer(out_channels),
        )
        self.bn2 = norm_layer(out_channels, dtype=dtype)
        # does not change the spatial dimension
        self.conv3 = torch.nn.Conv1d(
            out_channels,
            out_channels,
            kernel_size=1,
            stride=1,
            bias=False,
            dtype=dtype,
        )
        self.bn3 = norm_layer(out_channels, dtype=dtype)
        self.activation = activation_function(inplace=True)
        self.factor = torch.torch.nn.parameter.Parameter(torch.tensor(0.0, dtype=dtype))

    def next_spatial_dim(self, last_spatial_dim):
        return math.floor(
            (last_spatial_dim + 2 * self.padding - self.kernel_size)
            / self.stride + 1
        )

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.activation(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.activation(out)

        out = self.conv3(out)
        out = self.bn3(out)

        # not really the identity, but kind of
        identity = self.down_sample(x)

        return self.activation(out * self.factor + identity)


class ResNetEncoder(torch.torch.nn.Module):
    def __init__(
        self,
        spectrum_size,
        cnn_encoder_channel_dims,
        activation_function,
        kernel_size,
        stride,
        dtype,
        num_blocks,
        verbose=False,
    ):
        super(ResNetEncoder, self).__init__()

        self.spatial_dims = [spectrum_size]
        layers = []
        for in_channels, out_channels in zip(
            cnn_encoder_channel_dims[:-1],
            cnn_encoder_channel_dims[1:],
        ):
            block = ReZeroBlock(
                in_channels=in_channels,
                out_channels=out_channels,
                activation_function=activation_function,
                kernel_size=kernel_size,
                stride=stride,
                dtype=dtype,
            )
            layers.append(block)
            self.spatial_dims.append(block.next_spatial_dim(self.spatial_dims[-1]))
            for _ in range(num_blocks - 1):
                block = ReZeroBlock(
                    in_channels=out_channels,
                    out_channels=out_channels,
                    activation_function=activation_function,
                    kernel_size=kernel_size,
                    stride=1,
                    dtype=dtype,
                )
                layers.append(block)
                self.spatial_dims.append(block.next_spatial_dim(self.spatial_dims[-1]))

        self.resnet_layers = torch.torch.nn.Sequential(*layers)
        if verbose:
            print("CNN Encoder Channel Dims: %s" % (cnn_encoder_channel_dims))
            print("CNN Encoder Spatial Dims: %s" % (self.spatial_dims))

    def forward(self, x):
        return self.resnet_layers(x)


class ReZeroNet(torch.nn.Module):
    def __init__(
        self,
        spectra_channels,
        spectra_size,
        initial_cnn_channels,
        cnn_channel_factor,
        num_cnn_layers,
        kernel_size,
        stride,
        activation_function,
        fc_dims,
        fc_dropout=0.0,
        dtype=None,
        verbose=False,
        fc_output_channels=1,
        num_blocks=1,
        **kwargs,
    ):
        super().__init__()
        self.fc_output_channels = fc_output_channels
        self.dtype = dtype or torch.float32

        activation_function = getattr(torch.nn, activation_function)

        # Setup CNN Encoder
        cnn_encoder_channel_dims = [spectra_channels] + [
            int(initial_cnn_channels * (cnn_channel_factor**idx))
            for idx in range(num_cnn_layers)
        ]
        self.cnn_encoder = ResNetEncoder(
            spectrum_size=spectra_size,
            cnn_encoder_channel_dims=cnn_encoder_channel_dims,
            activation_function=activation_function,
            kernel_size=kernel_size,
            stride=stride,
            num_blocks=num_blocks,
            dtype=dtype,
            verbose=verbose,
        )
        self.fc_dims = [
            int(
                self.cnn_encoder.spatial_dims[-1]
            ) * int(cnn_encoder_channel_dims[-1])
        ] + fc_dims

        if verbose:
            print("Fc Dims: %s" % self.fc_dims)
        fc_layers = []
        for idx, (in_dim, out_dim) in enumerate(
                zip(self.fc_dims[:-2], self.fc_dims[1:-1])
        ):
            fc_layers.append(torch.nn.Linear(in_dim, out_dim))
            fc_layers.append(torch.nn.ELU())
            fc_layers.append(torch.nn.Dropout(fc_dropout / (2 ** idx)))
        fc_layers.append(
            torch.nn.Linear(
                self.fc_dims[-2],
                self.fc_dims[-1] * self.fc_output_channels,
            ),
        )
        self.fc_net = torch.nn.Sequential(*fc_layers)
        if verbose:
            num_params = sum(p.numel() for p in self.parameters())
            print("Number of Parameters: %s" % num_params)

    def forward(self, spectra):
        embeddings = self.cnn_encoder(spectra)
        forecast = self.fc_net(embeddings.view(-1, self.fc_dims[0]))
        if self.fc_output_channels > 1:
            forecast = forecast.reshape(
                -1, self.fc_output_channels, self.fc_dims[-1]
            )
        return forecast


In [11]:
from tqdm.auto import tqdm


mse_loss_function = MSEIgnoreNans()

def train(
    model, 
    optimizer,
    device,
    scaler, 
    scheduler,
    train_dl,
    eval_dl,
    loss_fn,
    epochs,
    checkpoint_name,
    score=-float("inf"),
    neptune_run=None,
    p=True,
):  
    for epoch in tqdm(range(epochs)):
        model.train()
        total_loss = 0.0
        all_logits = []
        all_targets = []
        
        for inputs, targets, weights in train_dl:
            inputs = inputs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)
            weights = weights.to(device, non_blocking=True)
            
            optimizer.zero_grad()
            with torch.amp.autocast(device_type=device, dtype=torch.float16, cache_enabled=True):
                logits = model(inputs)
                loss = loss_fn(logits, targets)
                            
            #loss.backward()
            #optimizer.step()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()
            if neptune_run is not None:  neptune_run["lr_step"].append(scheduler.get_last_lr()[0])
            
            total_loss += loss.detach().cpu()
            all_logits.append(logits.detach().cpu())
            all_targets.append(targets.detach().cpu())
        
        all_logits = torch.cat(all_logits)
        all_targets = torch.cat(all_targets)

        one, two, three, r2 = metric_fn(all_logits, all_targets)
        total_loss = total_loss / len(train_dl)
        
        model.eval()
        eval_total_loss = 0.0
        eval_all_logits = []
        eval_all_targets = []

        for inputs, targets, weights in eval_dl:
            inputs = inputs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)
            weights = weights.to(device, non_blocking=True)

            with torch.inference_mode():
                #with torch.amp.autocast(device_type=device, dtype=torch.float16, cache_enabled=True):
                logits = model(inputs)
                loss = loss_fn(logits, targets)

            eval_total_loss += loss.detach().cpu()
            eval_all_logits.append(logits.detach().cpu())
            eval_all_targets.append(targets.detach().cpu())
        
        eval_all_logits = torch.cat(eval_all_logits)
        eval_all_targets = torch.cat(eval_all_targets)

        eval_one, eval_two, eval_three, eval_r2 = metric_fn(eval_all_logits, eval_all_targets)
        eval_total_loss = eval_total_loss / len(eval_dl)
        
        if eval_r2 > score:
            score = eval_r2
            data = {"state_dict": model.state_dict()}
            data["epoch"] = epoch 
            data["score"] = score
            torch.save(data, f"/kaggle/working/{checkpoint_name}")
        
        if neptune_run is not None:
            neptune_run["train/loss"].append(total_loss)
            neptune_run["eval/loss"].append(eval_total_loss)
            neptune_run["train/r2"].append(r2)
            neptune_run["eval/r2"].append(eval_r2)
            neptune_run["train/one"].append(one)
            neptune_run["train/two"].append(two)
            neptune_run["train/three"].append(three)
            neptune_run["eval/one"].append(eval_one)
            neptune_run["eval/two"].append(eval_two)
            neptune_run["eval/three"].append(eval_three)
            
        if p and epoch % 5 == 0:
            print(
                f"Epoch: {epoch}, "
                f"train/loss: {total_loss:.4f}, "
                f"eval/loss: {eval_total_loss:.4f}, "
                f"train/r2: {r2:.4f}, "
                f"eval/r2: {eval_r2:.4f}, "
                f"train/one: {one:.4f}, "
                f"train/two: {two:.4f}, "
                f"train/three: {three:.4f}, "
                f"eval/one: {eval_one:.4f}, "
                f"eval/two: {eval_two:.4f}, "
                f"eval/three: {eval_three:.4f} "
            )
            
    if neptune_run is not None: neptune_run.stop()
    return score

In [12]:
import warnings#; warnings.filterwarnings("ignore")


EPOCHS = 100
WD = 1e-3
LR = 1e-4

DROPOUT = 0.5
DROP_PATH_RATE = None

device = "cuda" if torch.cuda.is_available() else "cpu"
RESUME = False

config["dtype"] = torch.float32
config["spectra_size"] = 1643
config["spectra_channels"] = 1
config["fc_dims"] = [
    config["fc_dims"],
    int(config["fc_dims"] / 2),
    3,
]

#mse_loss_function = MSEIgnoreNans()

In [13]:
from sklearn.model_selection import KFold


inputs_mean_std = []
targets_mean_std = []
scores = []
kfold = KFold(n_splits=5, shuffle=True, random_state=SEED)
splits = kfold.split(inputs)

for fold, (train_idx, eval_idx) in enumerate(splits):
    MODEL_NAME = f"resnet.paper.direct.train.fold.{fold}"
    checkpoint_name = f"paper.direct.train.fold.{fold}.pt"
    
    train_inputs = inputs[train_idx]
    train_targets = targets[train_idx]
    eval_inputs = inputs[eval_idx]
    eval_targets = targets[eval_idx]

    train_ds = get_dataset(train_inputs, train_targets, config)
    
    inputs_mean_std.append((fold, train_ds.s_mean, train_ds.s_std))
    targets_mean_std.append((fold, train_ds.concentration_means, train_ds.concentration_stds))
    
    eval_ds = get_dataset(eval_inputs, eval_targets, config, (train_ds.s_mean, train_ds.s_std), (train_ds.concentration_means, train_ds.concentration_stds))
    
    BATCH_SIZE = 32
    train_dl, eval_dl = return_dls(train_ds, eval_ds, BATCH_SIZE, len(eval_ds))
    
    #model = ResNet(input_channels=1, dropout=DROPOUT).to(device)
    model = ReZeroNet(**config).to(device)
    if fold == 0: print(get_model_size(model))
    
    #ckpt = get_ckpt("/kaggle/working/paper.pretrain.fold.3.pt")
    #model.load_state_dict(ckpt["state_dict"])
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WD, foreach=True)
    scaler = torch.amp.GradScaler(device)
    scheduler = get_scheduler(optimizer, train_dl, EPOCHS)
    
    score = train(
            model, 
            optimizer, 
            device,
            scaler,
            scheduler,
            train_dl, 
            eval_dl,
            loss_fn,
            EPOCHS,
            checkpoint_name,
            neptune_run=setup_neptune(),
        )
    
    scores.append(score)

0.734309
None




[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/arbaaz/kaggle-spect/e/KAG-142


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch: 0, train/loss: 1.0172, eval/loss: 0.8975, train/r2: -0.0174, eval/r2: -0.1450, train/one: -0.0145, train/two: -0.0211, train/three: -0.0165, eval/one: -0.0090, eval/two: -0.0265, eval/three: -0.3995 
Epoch: 5, train/loss: 0.9550, eval/loss: 1.0531, train/r2: -0.0542, eval/r2: -0.1901, train/one: -0.0872, train/two: -0.0243, train/three: -0.0510, eval/one: -0.3751, eval/two: -0.1024, eval/three: -0.0927 
Epoch: 10, train/loss: 0.8852, eval/loss: 0.9818, train/r2: 0.0553, eval/r2: -0.1184, train/one: 0.0075, train/two: -0.0007, train/three: 0.1592, eval/one: 0.0162, eval/two: -0.2655, eval/three: -0.1058 
Epoch: 15, train/loss: 0.7898, eval/loss: 0.9467, train/r2: 0.1760, eval/r2: 0.0186, train/one: -0.0006, train/two: 0.0167, train/three: 0.5119, eval/one: -0.0052, eval/two: -0.1404, eval/three: 0.2014 
Epoch: 20, train/loss: 0.6852, eval/loss: 0.8563, train/r2: 0.2832, eval/r2: -0.0017, train/one: 0.0799, train/two: 0.0056, train/three: 0.7640, eval/one: -0.0913, eval/two: -0.13

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch: 0, train/loss: 1.0662, eval/loss: 0.9719, train/r2: -0.0370, eval/r2: -0.1358, train/one: -0.0619, train/two: -0.0200, train/three: -0.0290, eval/one: -0.1062, eval/two: -0.2865, eval/three: -0.0146 
Epoch: 5, train/loss: 0.9967, eval/loss: 0.8060, train/r2: -0.0066, eval/r2: -0.0233, train/one: -0.0946, train/two: 0.0366, train/three: 0.0381, eval/one: -0.0210, eval/two: -0.0227, eval/three: -0.0261 
Epoch: 10, train/loss: 0.9565, eval/loss: 0.8314, train/r2: 0.0433, eval/r2: -0.0323, train/one: -0.0366, train/two: 0.0116, train/three: 0.1549, eval/one: 0.0169, eval/two: -0.0285, eval/three: -0.0853 
Epoch: 15, train/loss: 0.8958, eval/loss: 0.7879, train/r2: 0.1576, eval/r2: 0.0651, train/one: 0.0261, train/two: 0.0364, train/three: 0.4103, eval/one: 0.1362, eval/two: -0.1639, eval/three: 0.2229 
Epoch: 20, train/loss: 0.6828, eval/loss: 1.0552, train/r2: 0.2818, eval/r2: 0.1194, train/one: 0.0795, train/two: 0.0309, train/three: 0.7349, eval/one: -0.0327, eval/two: -0.3818, e

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch: 0, train/loss: 0.9722, eval/loss: 1.3871, train/r2: -0.0502, eval/r2: -0.0175, train/one: -0.0565, train/two: -0.0225, train/three: -0.0717, eval/one: -0.0154, eval/two: -0.0283, eval/three: -0.0088 
Epoch: 5, train/loss: 0.8818, eval/loss: 0.9453, train/r2: 0.0544, eval/r2: -0.2008, train/one: 0.0222, train/two: -0.0020, train/three: 0.1431, eval/one: -0.2375, eval/two: -0.0686, eval/three: -0.2962 
Epoch: 10, train/loss: 0.8742, eval/loss: 1.1500, train/r2: 0.1055, eval/r2: -0.0568, train/one: -0.1785, train/two: 0.0545, train/three: 0.4406, eval/one: -0.0503, eval/two: -0.2058, eval/three: 0.0858 
Epoch: 15, train/loss: 0.7490, eval/loss: 0.8851, train/r2: 0.2426, eval/r2: -0.0534, train/one: 0.0025, train/two: -0.0267, train/three: 0.7519, eval/one: 0.0059, eval/two: -0.2729, eval/three: 0.1069 
Epoch: 20, train/loss: 0.6988, eval/loss: 0.7717, train/r2: 0.3139, eval/r2: 0.1832, train/one: 0.0799, train/two: 0.0354, train/three: 0.8263, eval/one: 0.1243, eval/two: -0.4333, e

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch: 0, train/loss: 1.0620, eval/loss: 1.5301, train/r2: -0.0398, eval/r2: -0.1701, train/one: -0.0475, train/two: -0.0497, train/three: -0.0222, eval/one: -0.2129, eval/two: -0.0005, eval/three: -0.2968 
Epoch: 5, train/loss: 0.8770, eval/loss: 1.2654, train/r2: -0.0471, eval/r2: -0.4644, train/one: -0.1050, train/two: -0.1523, train/three: 0.1161, eval/one: -0.1151, eval/two: -0.4687, eval/three: -0.8092 
Epoch: 10, train/loss: 0.9722, eval/loss: 1.5542, train/r2: 0.0684, eval/r2: -0.5664, train/one: -0.1025, train/two: -0.0182, train/three: 0.3259, eval/one: -1.5822, eval/two: -0.0949, eval/three: -0.0221 
Epoch: 15, train/loss: 0.7635, eval/loss: 1.2672, train/r2: 0.2363, eval/r2: 0.0639, train/one: 0.0928, train/two: -0.0154, train/three: 0.6316, eval/one: -0.0314, eval/two: -0.0280, eval/three: 0.2512 
Epoch: 20, train/loss: 0.6283, eval/loss: 0.8415, train/r2: 0.3027, eval/r2: 0.1728, train/one: 0.1165, train/two: 0.0138, train/three: 0.7778, eval/one: -0.1826, eval/two: -0.08

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch: 0, train/loss: 1.0808, eval/loss: 1.0260, train/r2: -0.0205, eval/r2: -0.0890, train/one: -0.0268, train/two: -0.0008, train/three: -0.0339, eval/one: -0.0003, eval/two: -0.0745, eval/three: -0.1923 
Epoch: 5, train/loss: 0.9602, eval/loss: 1.0597, train/r2: 0.0363, eval/r2: -0.4079, train/one: 0.0270, train/two: -0.0573, train/three: 0.1391, eval/one: -0.0051, eval/two: -0.4082, eval/three: -0.8103 
Epoch: 10, train/loss: 0.8727, eval/loss: 0.9772, train/r2: 0.1424, eval/r2: -0.1881, train/one: 0.0082, train/two: 0.0501, train/three: 0.3689, eval/one: -0.0086, eval/two: -0.1166, eval/three: -0.4392 
Epoch: 15, train/loss: 0.7427, eval/loss: 0.7210, train/r2: 0.2755, eval/r2: 0.0007, train/one: 0.0092, train/two: 0.1236, train/three: 0.6937, eval/one: 0.0056, eval/two: -0.1648, eval/three: 0.1613 
Epoch: 20, train/loss: 0.7057, eval/loss: 0.6411, train/r2: 0.3123, eval/r2: 0.1305, train/one: 0.0867, train/two: 0.0071, train/three: 0.8431, eval/one: -0.0727, eval/two: -0.1333, ev

In [40]:
output_dir = "/kaggle/working"
output_files = sorted(os.listdir(output_dir))

ckpt_paths = []
for f in output_files:
    if "paper.direct.train" in f and "csv" not in f:
        ckpt_path = os.path.join(output_dir, f)
        ckpt_paths.append(ckpt_path)
        ckpt = torch.load(ckpt_path, weights_only=False)
        print(ckpt_path, ckpt["epoch"], ckpt["score"])

/kaggle/working/paper.direct.train.fold.0.pt 93 0.7429719215168401
/kaggle/working/paper.direct.train.fold.1.pt 68 0.7380484356754696
/kaggle/working/paper.direct.train.fold.2.pt 95 0.6799178597928467
/kaggle/working/paper.direct.train.fold.3.pt 94 0.6614433341484437
/kaggle/working/paper.direct.train.fold.4.pt 92 0.7835582747349822


In [42]:
def get_ckpt(path):
    return torch.load(path, weights_only=False)

ckpt = get_ckpt("/kaggle/working/paper.direct.train.fold.4.pt")
ckpt["epoch"], ckpt["score"]

(92, 0.7835582747349822)

In [43]:
model = ReZeroNet(**config).to(device)
model.load_state_dict(ckpt["state_dict"])

<All keys matched successfully>

In [44]:
test_inputs = get_test_data()
test_inputs.shape, test_inputs.min(), test_inputs.max()

((96, 1643), 0.19117267486879463, 1.0)

In [101]:
class SpectralTestDataset(Dataset):
    def __init__(
        self,
        spectra,
        concentrations,
        dtype=None,
        spectra_mean_std=None,
        concentration_mean_std=None,
        combine_spectra_range=0.0,
        baseline_factor_bound=0.0,
        baseline_period_lower_bound=100.0,
        baseline_period_upper_bound=200.0,
        augment_slope_std=0.0,
        augment_intersept_std=0.0,
        rolling_bound=0,
        spectrum_rolling_sigma=0.0,
        augmentation_weight=0.1,
        original_datapoint_weight=1.,
    ):
        self.dtype = dtype or torch.float32
        self.combine_spectra_range = combine_spectra_range
        self.baseline_factor_bound = baseline_factor_bound
        self.augment_slope_std = augment_slope_std
        self.augment_intercept_std = augment_intersept_std
        self.baseline_period_lower_bound = baseline_period_lower_bound
        self.baseline_period_upper_bound = baseline_period_upper_bound
        self.rolling_bound = rolling_bound
        self.spectrum_rolling_sigma = spectrum_rolling_sigma
        self.augmentation_weight = torch.tensor(augmentation_weight, dtype=dtype)
        self.original_dp_weight = original_datapoint_weight

        # normalize spectra
        spectra = torch.tensor(spectra, dtype=dtype)

        if spectra_mean_std is None:
            self.s_mean = torch.mean(spectra)
            self.s_std = torch.std(spectra)
        else:
            self.s_mean, self.s_std = spectra_mean_std

        self.spectra = torch.divide(
            torch.subtract(spectra, self.s_mean),
            self.s_std,
        )

        self.dummy_wns = np.tile(
            np.arange(
                0., 1., 1. / self.spectra.shape[2],
                dtype=np_dtype_from_torch[self.dtype]
            )[None, :self.spectra.shape[2]],
            (self.spectra.shape[1], 1),
        )

        if False:
            # normalize concentrations
            concentrations = torch.tensor(concentrations, dtype=dtype)
            if concentration_mean_std is None:
                self.concentration_means = torch.nanmean(concentrations, dim=0)

                self.concentration_stds = torch.maximum(
                    torch.tensor(
                        [
                            torch.std(col[torch.logical_not(torch.isnan(col))])
                            for col in concentrations.T
                        ]
                    ),
                    torch.tensor([1e-3] * concentrations.shape[1]),
                )
            else:
                self.concentration_means = concentration_mean_std[0]
                self.concentration_stds = concentration_mean_std[1]

            self.concentrations = torch.divide(
                torch.subtract(
                    concentrations,
                    self.concentration_means,
                ),
                self.concentration_stds,
            )

    def pick_two(self, max_idx=None):
        max_idx = max_idx or len(self)
        return random.choices(range(max_idx), k=2)

    def __len__(self):
        return 96

    def augment_spectra(self, spectra):
        if self.augment_slope_std > 0.0:

            def spectrum_approximation(x, slope, intercept):
                return (slope * x + intercept).reshape(-1, 1)[:, 0]

            slope, inter = scipy.optimize.curve_fit(
                spectrum_approximation,
                self.dummy_wns,
                spectra.reshape(-1, 1)[:, 0],
                p0=np.random.rand(2),
            )[0]

            new_slope = slope * (
                    np.random.gamma(
                        shape=1. / self.augment_slope_std,
                        scale=self.augment_slope_std,
                        size=1,
                    )
            )[0]
            new_intercept = inter * (
                1.0 + np.random.randn(1) * self.augment_intercept_std
            )[0]
            spectra += torch.tensor(
                (new_slope - slope)
            ) * self.dummy_wns + new_intercept - inter

        factor = self.baseline_factor_bound * torch.rand(size=(1,))
        offset = torch.rand(size=(1,)) * 2.0 * torch.pi
        period = self.baseline_period_lower_bound + (
            self.baseline_period_upper_bound - self.baseline_period_lower_bound
        ) * torch.rand(size=(1,))
        permutations = factor * torch.cos(
            2.0 * torch.pi / period * self.dummy_wns + offset
        )
        return self.roll_spectrum(
            spectra + permutations * spectra,
            delta=random.randint(-self.rolling_bound, self.rolling_bound),
        )

    def roll_spectrum(self, spectra, delta):
        num_spectra = spectra.shape[0]
        rolled_spectra = np.roll(spectra, delta, axis=1)
        if delta > 0:
            rolled_spectra[:, :delta] = (
                np.random.rand(num_spectra, delta) * self.spectrum_rolling_sigma + 1
            ) * rolled_spectra[:, delta:(delta + 1)]
        elif delta < 0:
            rolled_spectra[:, delta:] = (
                np.random.rand(num_spectra, -delta) * self.spectrum_rolling_sigma + 1
            ) * rolled_spectra[:, delta - 1:delta]
        return rolled_spectra

    def combine_k_items(self, indices, weights):
        return (
            # spectra
            torch.sum(
                torch.mul(weights[:, None, None], self.spectra[indices, :, :]),
                dim=0,
            ),
            # concentrations
            #torch.sum(
            #    torch.mul(weights[:, None], self.concentrations[indices, :]),
            #    dim=0,
            #)
        )

    def __getitem__(self, idx):
        if True:#self.combine_spectra_range < 1e-12:
            spectrum = self.spectra[idx]
            #spectrum = self.augment_spectra(spectrum)
            return spectrum
        else:
            if random.random() < self.original_dp_weight:
                one_weight = 1.
                label_weight = torch.tensor(1.0, dtype=self.dtype)
            else:
                one_weight = random.uniform(0.0, self.combine_spectra_range)
                label_weight = self.augmentation_weight
            weights = torch.tensor([one_weight, (1 - one_weight)])
            # just pick two random indices
            indices = random.choices(range(len(self)), k=2)

            mixed_spectra = self.combine_k_items(
                indices=indices,
                weights=weights,
            )
            mixed_spectra = self.augment_spectra(mixed_spectra[0])
            return mixed_spectra
        
        
test_ds = SpectralTestDataset(
    spectra=test_inputs[:, None, :],
    concentrations=None,
    dtype=torch.float32,
    spectra_mean_std=inputs_mean_std[1][1:],
    concentration_mean_std=targets_mean_std[1][1:],
    combine_spectra_range=1.0,
    baseline_factor_bound=config["baseline_factor_bound"],
    baseline_period_lower_bound=config["baseline_period_lower_bound"],
    baseline_period_upper_bound=(config["baseline_period_lower_bound"] + config["baseline_period_span"]),
    augment_slope_std=config["augment_slope_std"],
    augment_intersept_std=0.0,
    rolling_bound=config["rolling_bound"],
    spectrum_rolling_sigma=0.01,
    augmentation_weight=0.1,
    original_datapoint_weight=1.,
)

test_dl = DataLoader(test_ds, batch_size=32)

In [102]:
inputs_mean_std[1][1:], targets_mean_std[1][1:]

((tensor(0.3595), tensor(0.1597)),
 (tensor([6.8227, 1.1995, 1.6005]), tensor([2.9272, 0.5459, 0.6937])))

In [103]:
def get_test_dataset(inputs, inputs_mean_std, targets_mean_std):
    return SpectralTestDataset(
        spectra=test_inputs[:, None, :],
        concentrations=None,
        dtype=torch.float32,
        spectra_mean_std=inputs_mean_std,
        concentration_mean_std=targets_mean_std,
        combine_spectra_range=1.0,
        baseline_factor_bound=config["baseline_factor_bound"],
        baseline_period_lower_bound=config["baseline_period_lower_bound"],
        baseline_period_upper_bound=(config["baseline_period_lower_bound"] + config["baseline_period_span"]),
        augment_slope_std=config["augment_slope_std"],
        augment_intersept_std=0.0,
        rolling_bound=config["rolling_bound"],
        spectrum_rolling_sigma=0.01,
        augmentation_weight=0.1,
        original_datapoint_weight=1.,
    )


In [46]:
all_preds = []
all_inputs = []
model.eval()

for i in test_dl:
    #i = data["spectra"].cuda()
    i = i.cuda()
    with torch.inference_mode():
        preds = model(i)
    all_inputs.append(i.cpu())
    all_preds.append(cuda_to_np(preds.double()))

In [47]:
preds = np.concatenate(all_preds)
preds.shape

(96, 3)

In [48]:
preds

array([[-0.12889805,  0.76554817, -0.53647536],
       [-1.30538452,  0.39789349, -0.82551134],
       [-0.39988276,  0.63025367, -1.8318367 ],
       [-1.37605441,  0.88060683, -0.07746108],
       [-0.61503583,  0.60447729, -0.24853687],
       [ 0.05681216,  0.49679908, -0.83895802],
       [-0.49156347,  0.16905291,  0.16944259],
       [-0.98279303,  0.03841651, -2.1095736 ],
       [ 0.08147585, -0.78061247, -1.70073855],
       [-0.97424752, -0.34118888, -0.12129673],
       [-0.74376577,  0.01164164, -1.81835663],
       [-0.88535959, -0.5206126 , -0.93554771],
       [ 0.11631434, -0.85268688, -0.36983517],
       [-0.72388369,  0.70626581, -0.22292535],
       [-0.18011081,  0.38986945, -0.14631838],
       [-1.30374539,  0.57094562, -1.84710085],
       [-0.39249933,  0.67446804, -0.45921585],
       [-0.44811502,  0.37662947, -0.4820644 ],
       [-0.84195536,  0.8274883 ,  0.04623342],
       [-1.14202714,  0.42112377,  0.13470052],
       [-0.86488867,  0.62372047,  0.210

In [49]:
def reverse_zscore(tensor, mu, sigma):
    return (tensor * sigma) + mu

In [50]:
mus = targets_mean_std[1][1:][0]
sigmas = targets_mean_std[1][1:][1]

for i in range(3):
    preds[:, i] = reverse_zscore(preds[:, i], mus[i].numpy(), sigmas[i].numpy())
preds

array([[6.44541702, 1.6173766 , 1.22835177],
       [3.00165989, 1.41666778, 1.02785038],
       [5.65220302, 1.54351708, 0.32977248],
       [2.79479823, 1.68018904, 1.54676541],
       [5.02241685, 1.52944532, 1.42809183],
       [6.98901944, 1.47066199, 1.01852255],
       [5.38383947, 1.29173989, 1.71804004],
       [3.94593486, 1.2204233 , 0.13710915],
       [7.06121386, 0.77330172, 0.42071396],
       [3.97094891, 1.01319039, 1.51635705],
       [4.64560448, 1.20580645, 0.33912347],
       [4.23113757, 0.91523999, 0.95151924],
       [7.16319146, 0.7339551 , 1.34394841],
       [4.7038024 , 1.58501337, 1.44585829],
       [6.29550939, 1.41228732, 1.49899979],
       [3.00645787, 1.51113982, 0.31918389],
       [5.67381548, 1.56765444, 1.28194592],
       [5.51101978, 1.4050594 , 1.26609611],
       [4.35818842, 1.65119075, 1.63257105],
       [3.4798321 , 1.42934958, 1.6939398 ],
       [4.2910591 , 1.53995049, 1.74645987],
       [5.00593553, 0.94989481, 1.40496107],
       [6.

In [51]:
get_stats(preds, minmax=True)

Min: 0.13710914923041173, Max: 9.092672578832975 ,Mean: 2.6940206574223575, Std: 2.167201817302101


In [52]:
column_names = ['Glucose', 'Sodium Acetate', 'Magnesium Sulfate']
preds_df = pd.DataFrame(preds, columns=column_names)
preds_df.insert(0, 'ID', [i+1 for i in range(len(preds_df))])
preds_df

Unnamed: 0,ID,Glucose,Sodium Acetate,Magnesium Sulfate
0,1,6.445417,1.617377,1.228352
1,2,3.001660,1.416668,1.027850
2,3,5.652203,1.543517,0.329772
3,4,2.794798,1.680189,1.546765
4,5,5.022417,1.529445,1.428092
...,...,...,...,...
91,92,6.110946,1.410194,1.166755
92,93,6.653915,1.520884,1.230389
93,94,6.135319,1.571345,0.602965
94,95,5.953444,1.607306,1.296551


In [53]:
ckpt["score"]

0.7835582747349822

In [54]:
name = "paper.direct.train.augments.at.inference.7835.csv"
preds_df.to_csv(name, index=False)
f = pd.read_csv(f"/kaggle/working/{name}")
f

Unnamed: 0,ID,Glucose,Sodium Acetate,Magnesium Sulfate
0,1,6.445417,1.617377,1.228352
1,2,3.001660,1.416668,1.027850
2,3,5.652203,1.543517,0.329772
3,4,2.794798,1.680189,1.546765
4,5,5.022417,1.529445,1.428092
...,...,...,...,...
91,92,6.110946,1.410194,1.166755
92,93,6.653915,1.520884,1.230389
93,94,6.135319,1.571345,0.602965
94,95,5.953444,1.607306,1.296551


In [100]:
import time
for i in range(4000):
    time.sleep(1)

KeyboardInterrupt: 

In [108]:
def ensemble_inference():
    test_inputs = get_test_data()
    all_preds = []

    for i, ckpt_path in enumerate(ckpt_paths):
        ckpt = get_ckpt(ckpt_path)
        
        model = ReZeroNet(**config).to(device)
        model.load_state_dict(ckpt["state_dict"])
        model.eval()

        test_ds = get_test_dataset(test_inputs, inputs_mean_std[i][1:], targets_mean_std[i][1:])
        test_dl = DataLoader(test_ds, batch_size=32)
        
        fold_preds = []
        for inputs in test_dl:
            with torch.inference_mode():
                preds = model(inputs.cuda())
                preds = cuda_to_np(preds.double())
                fold_preds.append(preds)
                
        fold_preds = np.concatenate(fold_preds)
        
        means = targets_mean_std[i][1:][0]
        stds = targets_mean_std[i][1:][1]
        for i in range(3):
            fold_preds[:, i] = reverse_zscore(fold_preds[:, i], means[i].numpy(), stds[i].numpy())
            
        all_preds.append(fold_preds)

    return np.mean(all_preds, axis=0)

preds = ensemble_inference()
preds

array([[ 3.73206862,  1.30506718,  0.46157049],
       [ 5.43849122,  1.39460257,  1.68025217],
       [ 5.02283318,  0.98982292,  0.98071207],
       [ 3.59836663,  1.42305321,  0.39213738],
       [10.14436171,  0.8331019 ,  0.98423216],
       [ 7.65942594,  1.2717677 ,  1.00165221],
       [ 6.32538089,  1.21082884,  0.3484959 ],
       [ 7.55196178,  1.31989633,  1.1601794 ],
       [ 6.94738812,  1.48646357,  1.2152102 ],
       [ 9.56747912,  0.95405478,  0.38146618],
       [ 7.6343491 ,  1.41157516,  1.3987115 ],
       [ 5.32565758,  1.50554824,  1.26598057],
       [ 5.74219535,  1.46645673,  1.51402041],
       [ 5.53060332,  1.49178362,  1.7117704 ],
       [ 6.16797656,  1.47151561,  1.3737299 ],
       [ 6.52887383,  1.47973257,  1.27042064],
       [ 5.6896512 ,  1.52098184,  1.37336475],
       [ 5.91704443,  1.51074395,  1.54322621],
       [ 6.23655636,  1.51965464,  1.53387962],
       [ 5.25693829,  1.47541854,  1.48599154],
       [ 5.95017012,  1.45495154,  1.372

In [111]:
column_names = ['Glucose', 'Sodium Acetate', 'Magnesium Sulfate']
preds_df = pd.DataFrame(preds, columns=column_names)
preds_df.insert(0, 'ID', [i+1 for i in range(len(preds_df))])
preds_df

Unnamed: 0,ID,Glucose,Sodium Acetate,Magnesium Sulfate
0,1,3.732069,1.305067,0.461570
1,2,5.438491,1.394603,1.680252
2,3,5.022833,0.989823,0.980712
3,4,3.598367,1.423053,0.392137
4,5,10.144362,0.833102,0.984232
...,...,...,...,...
91,92,4.989497,1.508321,1.594583
92,93,6.638515,0.894362,0.873203
93,94,5.175408,1.107298,0.803580
94,95,2.402599,1.464722,0.955560


In [112]:
name = "paper.direct.train.ensemble.csv"
preds_df.to_csv(name, index=False)
f = pd.read_csv(f"/kaggle/working/{name}")
f

Unnamed: 0,ID,Glucose,Sodium Acetate,Magnesium Sulfate
0,1,3.732069,1.305067,0.461570
1,2,5.438491,1.394603,1.680252
2,3,5.022833,0.989823,0.980712
3,4,3.598367,1.423053,0.392137
4,5,10.144362,0.833102,0.984232
...,...,...,...,...
91,92,4.989497,1.508321,1.594583
92,93,6.638515,0.894362,0.873203
93,94,5.175408,1.107298,0.803580
94,95,2.402599,1.464722,0.955560
