In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [2]:
import random
import torch
import numpy as np


def setup_reproducibility(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(False, warn_only=True)
    torch.set_float32_matmul_precision("high")
    
SEED = 1000
setup_reproducibility(SEED)

In [3]:
import os
from collections import OrderedDict
from transformers import get_cosine_schedule_with_warmup
from scipy import signal
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from huggingface_hub import login, snapshot_download
from tqdm.auto import tqdm


def average_state_dicts(state_dict_list):
    n = len(state_dict_list)
    # Ensure we don't modify the originals
    avg_sd = OrderedDict()

    # Iterate over every parameter/buffer key
    for k in state_dict_list[0]:
        # sum across models → float32 to avoid overflow on int types
        avg = sum(sd[k].float() for sd in state_dict_list) / n
        # cast back to original dtype if needed
        avg_sd[k] = avg.to(dtype=state_dict_list[0][k].dtype)

    return avg_sd


def get_ckpt_paths(output_dir, keyword):
    output_files = sorted(os.listdir(output_dir))

    ckpt_paths = []
    for f in output_files:
        if keyword in f and "csv" not in f:
            ckpt_path = os.path.join(output_dir, f)
            ckpt_paths.append(ckpt_path)
            ckpt = torch.load(ckpt_path, weights_only=False)
            print(ckpt_path, ckpt["epoch"], ckpt["score"])
            
    return ckpt_paths


def rest(t=4000):
    import time
    [time.sleep(1) for i in range(t)]
        
        
def generate_csv(preds, name):
    column_names = ['Glucose', 'Sodium Acetate', 'Magnesium Sulfate']
    preds_df = pd.DataFrame(preds, columns=column_names)
    preds_df.insert(0, 'ID', [i+1 for i in range(len(preds_df))])
    preds_df.to_csv(name, index=False)
    
    
def get_ckpt(path):
    return torch.load(path, weights_only=False)


def cuda_to_np(tensor):
    return tensor.cpu().detach().numpy()


def get_scheduler(optimizer, train_dl, epochs):
    total_training_steps = len(train_dl) * epochs
    warmup_steps = int(total_training_steps * 0.05)  # e.g. 5% warmup
    
    return get_cosine_schedule_with_warmup(
        optimizer=optimizer,
        num_warmup_steps=warmup_steps,
        num_training_steps=total_training_steps
    )


def get_stats(tensor, p=True, r=False, minmax=False):
    if minmax:
        min, max = tensor.min(), tensor.max()
        mean, std = tensor.mean(), tensor.std()
        if p: print(f"Min: {min}, Max: {max}, Mean: {mean}, Std: {std}")
        if r: return min, max, mean, std
    else:
        mean, std = tensor.mean(), tensor.std()
        if p: print(f"Mean: {mean}, Std: {std}")
        if r: return mean, std
    
    
def zscore(tensor, mean=None, std=None):
    if mean is None: mean = tensor.mean()
    if std is None: std = tensor.std()
    return (tensor - mean) / (std + 1e-8)


def reverse_zscore(tensor, mu, sigma):
    return (tensor * sigma) + mu


def get_model_size(model):
    print(sum(p.numel() for p in model.parameters()) / 1e6)
    

def get_index(iterable):
    return random.randint(0, len(iterable) - 1)


def get_indices(iterable, n):
    return random.sample(range(len(iterable)), n)


def split(inputs, targets, seed):
    return train_test_split(
        inputs,
        targets, 
        test_size=0.2,
        shuffle=True, 
        random_state=seed
    ) 


def show_waves(waves, dpi=100):
    """
    waves: numpy array of shape (3, N)
    Creates three separate figures that stretch wide.
    """
    N = waves.shape[1]
    t = np.arange(N)

    # Wide aspect ratio; height modest so each window fills width
    for i in range(waves.shape[0]):
        fig = plt.figure(figsize=(14, 4), dpi=dpi)  # wide figure
        ax = fig.add_subplot(111)
        ax.plot(t, waves[i], linewidth=1)
        ax.set_title(f"Wave {i+1}")
        ax.set_xlabel("Sample")
        ax.set_ylabel("Amplitude")
        ax.grid(True)
        fig.tight_layout()  # reduce margins to use width
        
    plt.show()
    
    
def hf_ds_download(hf_token, repo_id):
    login(hf_token[1:])
    return snapshot_download(repo_id, repo_type="dataset")


def get_spectra_features(X, b=False):
    """Create multi-channel features from spectra: raw, 1st derivative, 2nd derivative."""
    X_processed = np.zeros_like(X)
    # Baseline correction and SNV
    for i in tqdm(range(X.shape[0])):
        poly = np.polyfit(np.arange(X.shape[1]), X[i], 3)
        baseline = np.polyval(poly, np.arange(X.shape[1]))
        corrected_spec = X[i] - baseline
        #X_processed[i] = (corrected_spec - corrected_spec.mean()) / (corrected_spec.std() + 1e-8)
        X_processed[i] = corrected_spec
        
    # Calculate derivatives
    deriv1 = signal.savgol_filter(X_processed, window_length=11, polyorder=3, deriv=1, axis=1)
    deriv2 = signal.savgol_filter(X_processed, window_length=11, polyorder=3, deriv=2, axis=1)

    if b: return np.stack([X_processed, deriv1, deriv2], axis=1)
    return np.stack([deriv1, deriv2], axis=1)

In [4]:
import os

if False:
    path = "/kaggle/input/dig-4-bio-raman-transfer-learning-challenge"
    files = os.listdir(path)
    [print((i, files[i])) for i in range(len(files))]

In [5]:
if True:
    hf_token = "xhf_XURkoNhwOIPtEdHfNeRpVkjEwKSkhtigFi"
    path = hf_ds_download(hf_token, "ArbaazBeg/kaggle-spectogram")
    files = os.listdir(path)
    [(i, files[i]) for i in range(len(files))]

Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]

In [6]:
import pandas as pd


def load_comp_data(filepath, is_train=True):
    """Load and preprocess the Raman spectroscopy data"""
    if is_train:
        df = pd.read_csv(filepath)
        # Extract target variables
        target_cols = ['Glucose (g/L)', 'Sodium Acetate (g/L)', 'Magnesium Acetate (g/L)']
        y = df[target_cols].dropna().values
        
        # Process spectral data
        X = df.iloc[:, :-4] # Remove last 4 columns (analyte info and targets)
    else:
        df = pd.read_csv(filepath, header=None)
        X = df
        y = None
    
    # Set column names
    X.columns = ["sample_id"] + [str(i) for i in range(X.shape[1]-1)]
    
    # Fill sample_id using forward fill
    X['sample_id'] = X['sample_id'].ffill()
    
    # Clean sample_id
    if is_train:
        X['sample_id'] = X['sample_id'].str.strip()
    else:
        X['sample_id'] = X['sample_id'].str.strip().str.replace('sample', '').astype(int)
    
    # Clean spectral data (remove brackets)
    spectral_cols = X.columns[1:]
    for col in spectral_cols:
        X[col] = X[col].astype(str).str.replace('[', '', regex=False).str.replace(']', '', regex=False)
        X[col] = pd.to_numeric(X[col], errors='coerce')

    return X, y


def fix_val_test_shape(X):
    lower_wns = 300
    upper_wns = 1942
    joint_wns = np.arange(lower_wns, upper_wns + 1)
    spectral_values = np.linspace(65, 3350, 2048)

    spectra_selection = np.logical_and(
        lower_wns <= spectral_values, spectral_values <= upper_wns,
    )
    wns = spectral_values[spectra_selection]
    X = X[:, spectra_selection]
    X = np.array([np.interp(joint_wns, xp=wns, fp=spectrum,)for spectrum in X])
    return X

In [7]:
inputs, targets = load_comp_data(os.path.join(path, 'transfer_plate.csv'), is_train=True)
test_inputs, _ = load_comp_data(os.path.join(path, '96_samples.csv'), is_train=False)

In [8]:
inputs = inputs.drop('sample_id', axis=1).values.reshape(-1, 2, 2048).mean(axis=1)
test_inputs = test_inputs.drop('sample_id', axis=1).values.reshape(-1, 2, 2048).mean(axis=1)

inputs = fix_val_test_shape(inputs)
test_inputs = fix_val_test_shape(test_inputs)

# Version 2 Update: Normalise Val and Test data like Train
inputs = inputs / np.max(inputs)
test_inputs = test_inputs / np.max(test_inputs)

In [9]:
inputs.shape, targets.shape, test_inputs.shape

((96, 1643), (96, 3), (96, 1643))

In [10]:
import numpy as np
import random
import torch
from torch.utils.data import Dataset
import scipy.optimize


np_dtype_from_torch = {
    torch.float32: np.float32,
    torch.float64: np.float64,
}

class SpectralDataset(Dataset):
    def __init__(
        self,
        spectra,
        concentrations,
        dtype=None,
        spectra_mean_std=None,
        concentration_mean_std=None,
        combine_spectra_range=0.0,
        baseline_factor_bound=0.0,
        baseline_period_lower_bound=100.0,
        baseline_period_upper_bound=200.0,
        augment_slope_std=0.0,
        augment_intersept_std=0.0,
        rolling_bound=0,
        spectrum_rolling_sigma=0.0,
        augmentation_weight=0.1,
        original_datapoint_weight=1.,
    ):
        self.dtype = dtype or torch.float32
        self.combine_spectra_range = combine_spectra_range
        self.baseline_factor_bound = baseline_factor_bound
        self.augment_slope_std = augment_slope_std
        self.augment_intercept_std = augment_intersept_std
        self.baseline_period_lower_bound = baseline_period_lower_bound
        self.baseline_period_upper_bound = baseline_period_upper_bound
        self.rolling_bound = rolling_bound
        self.spectrum_rolling_sigma = spectrum_rolling_sigma
        self.augmentation_weight = torch.tensor(augmentation_weight, dtype=dtype)
        self.original_dp_weight = original_datapoint_weight

        # normalize spectra
        spectra = torch.tensor(spectra, dtype=dtype)

        if spectra_mean_std is None:
            self.s_mean = torch.mean(spectra)
            self.s_std = torch.std(spectra)
        else:
            self.s_mean, self.s_std = spectra_mean_std

        self.spectra = torch.divide(
            torch.subtract(spectra, self.s_mean),
            self.s_std,
        )

        self.dummy_wns = np.tile(
            np.arange(
                0., 1., 1. / self.spectra.shape[2],
                dtype=np_dtype_from_torch[self.dtype]
            )[None, :self.spectra.shape[2]],
            (self.spectra.shape[1], 1),
        )

        # normalize concentrations
        concentrations = torch.tensor(concentrations, dtype=dtype)
        if concentration_mean_std is None:
            self.concentration_means = torch.nanmean(concentrations, dim=0)

            self.concentration_stds = torch.maximum(
                torch.tensor(
                    [
                        torch.std(col[torch.logical_not(torch.isnan(col))])
                        for col in concentrations.T
                    ]
                ),
                torch.tensor([1e-3] * concentrations.shape[1]),
            )
        else:
            self.concentration_means = concentration_mean_std[0]
            self.concentration_stds = concentration_mean_std[1]

        self.concentrations = torch.divide(
            torch.subtract(
                concentrations,
                self.concentration_means,
            ),
            self.concentration_stds,
        )

    def pick_two(self, max_idx=None):
        max_idx = max_idx or len(self)
        return random.choices(range(max_idx), k=2)

    def __len__(self):
        return len(self.concentrations)

    def augment_spectra(self, spectra):
        if self.augment_slope_std > 0.0:

            def spectrum_approximation(x, slope, intercept):
                return (slope * x + intercept).reshape(-1, 1)[:, 0]

            slope, inter = scipy.optimize.curve_fit(
                spectrum_approximation,
                self.dummy_wns,
                spectra.reshape(-1, 1)[:, 0],
                p0=np.random.rand(2),
            )[0]

            new_slope = slope * (
                    np.random.gamma(
                        shape=1. / self.augment_slope_std,
                        scale=self.augment_slope_std,
                        size=1,
                    )
            )[0]
            new_intercept = inter * (
                1.0 + np.random.randn(1) * self.augment_intercept_std
            )[0]
            spectra += torch.tensor(
                (new_slope - slope)
            ) * self.dummy_wns + new_intercept - inter

        factor = self.baseline_factor_bound * torch.rand(size=(1,))
        offset = torch.rand(size=(1,)) * 2.0 * torch.pi
        period = self.baseline_period_lower_bound + (
            self.baseline_period_upper_bound - self.baseline_period_lower_bound
        ) * torch.rand(size=(1,))
        permutations = factor * torch.cos(
            2.0 * torch.pi / period * self.dummy_wns + offset
        )
        return self.roll_spectrum(
            spectra + permutations * spectra,
            delta=random.randint(-self.rolling_bound, self.rolling_bound),
        )

    def roll_spectrum(self, spectra, delta):
        num_spectra = spectra.shape[0]
        rolled_spectra = np.roll(spectra, delta, axis=1)
        if delta > 0:
            rolled_spectra[:, :delta] = (
                np.random.rand(num_spectra, delta) * self.spectrum_rolling_sigma + 1
            ) * rolled_spectra[:, delta:(delta + 1)]
        elif delta < 0:
            rolled_spectra[:, delta:] = (
                np.random.rand(num_spectra, -delta) * self.spectrum_rolling_sigma + 1
            ) * rolled_spectra[:, delta - 1:delta]
        return rolled_spectra

    def combine_k_items(self, indices, weights):
        return (
            # spectra
            torch.sum(
                torch.mul(weights[:, None, None], self.spectra[indices, :, :]),
                dim=0,
            ),
            # concentrations
            torch.sum(
                torch.mul(weights[:, None], self.concentrations[indices, :]),
                dim=0,
            )
        )

    def __getitem__(self, idx):
        if self.combine_spectra_range < 1e-12:
            spectrum = self.spectra[idx]
            spectrum = self.augment_spectra(spectrum)
            return (
                spectrum,
                self.concentrations[idx],
                torch.tensor(1.0, dtype=self.dtype),
            )
        else:
            if random.random() < self.original_dp_weight:
                one_weight = 1.
                label_weight = torch.tensor(1.0, dtype=self.dtype)
            else:
                one_weight = random.uniform(0.0, self.combine_spectra_range)
                label_weight = self.augmentation_weight
            weights = torch.tensor([one_weight, (1 - one_weight)])
            # just pick two random indices
            indices = random.choices(range(len(self)), k=2)

            mixed_spectra, mixed_concentrations = self.combine_k_items(
                indices=indices,
                weights=weights,
            )
            mixed_spectra = self.augment_spectra(mixed_spectra)
            return mixed_spectra, mixed_concentrations, label_weight


config = {
    'initial_cnn_channels': 32,
    'cnn_channel_factor': 1.279574024454846,
    'num_cnn_layers': 8,
    'kernel_size': 3,
    'stride': 2,
    'activation_function': 'ELU',
    'fc_dropout': 0.10361700399831791,
    'lr': 0.001,
    'gamma': 0.9649606352621118,
    'baseline_factor_bound': 0.748262317340447,
    'baseline_period_lower_bound': 0.9703081695287203,
    'baseline_period_span': 19.79744237606427,
    'original_datapoint_weight': 0.4335003268130408,
    'augment_slope_std': 0.08171025264382692,
    'batch_size': 32,
    'fc_dims': 226,
    'rolling_bound': 2,
    'num_blocks': 2,
}

def get_dataset(inputs, targets, config, inputs_mean_std=None, targets_mean_std=None):
    return SpectralDataset(
        spectra=inputs[:, None, :],
        concentrations=targets,
        dtype=torch.float32,
        spectra_mean_std=inputs_mean_std,
        concentration_mean_std=targets_mean_std,
        combine_spectra_range=1.0,
        baseline_factor_bound=config["baseline_factor_bound"],
        baseline_period_lower_bound=config["baseline_period_lower_bound"],
        baseline_period_upper_bound=(config["baseline_period_lower_bound"] + config["baseline_period_span"]),
        augment_slope_std=config["augment_slope_std"],
        augment_intersept_std=0.0,
        rolling_bound=config["rolling_bound"],
        spectrum_rolling_sigma=0.01,
        augmentation_weight=0.1,
        original_datapoint_weight=1.,
    )

In [11]:
from torch.utils.data import DataLoader


def build_loader(
    SEED,
    ds,
    train=True,
    batch_size=1,
    shuffle=False,
    num_workers=4,
    drop_last=True,
    pin_memory=True,
    persistent_workers=False,
):
    def seed_worker(worker_id):
        worker_seed = torch.initial_seed() % 2**32
        np.random.seed(worker_seed)
        random.seed(worker_seed)

    generator = torch.Generator()
    generator.manual_seed(SEED if train else SEED+5232)

    return DataLoader(
        ds,
        batch_size=batch_size,
        shuffle=shuffle,
        num_workers=num_workers,
        pin_memory=pin_memory,
        drop_last=drop_last,
        persistent_workers=persistent_workers,
        worker_init_fn=seed_worker,
        generator=generator,
        #sampler=DistributedSampler(
        #    train_ds,
        #    shuffle=True,
        #    drop_last=True,
        #    seed=config.seed
        #)
    )
    
    
def return_dls(train_ds, eval_ds, train_batch_size, eval_batch_size):
    train_dl = build_loader(
        SEED,
        train_ds,
        train=True,
        batch_size=train_batch_size,
        shuffle=True,
        num_workers=0,
        drop_last=False,
        pin_memory=True,
        persistent_workers=False,
    )

    eval_dl = build_loader(
        SEED,
        eval_ds,
        train=False,
        batch_size=eval_batch_size,
        shuffle=False,
        num_workers=0,
        drop_last=False,
        pin_memory=True,
        persistent_workers=False,
    )
    
    return train_dl, eval_dl

In [12]:
import neptune


def setup_neptune():
    if not RESUME:
        neptune_run = neptune.init_run(
            project="arbaaz/kaggle-spect",
            name=MODEL_NAME,
            api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJlOGE2YjNiZS1mZGUyLTRjYjItYTg5Yy1mZWJkZTIzNzE1NmIifQ=="
        )

        neptune_run["h_parameters"] = {
            "seed": SEED,
            "model_name": MODEL_NAME,
            "optimizer_name": "nadam",
            "learning_rate": LR,
            "scheduler_name": "default",
            "weight_decay": WD,
            "num_epochs": EPOCHS,
            "batch_size": BATCH_SIZE,
        }
        if DROPOUT: neptune_run["h_parameters"] = {"dropout": DROPOUT}
        if DROP_PATH_RATE: neptune_run["h_parameters"] = {"drop_path_rate": DROP_PATH_RATE}
    else:
        neptune_run = neptune.init_run(
            project="arbaaz/crunchdao-structural-break",
            with_id=config.with_id,
            api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJlOGE2YjNiZS1mZGUyLTRjYjItYTg5Yy1mZWJkZTIzNzE1NmIifQ=="
        )

    return neptune_run

In [13]:
import torch.nn.functional as F
from torch.nn.modules.loss import _Loss
from sklearn.metrics import r2_score


def loss_fn(logits, targets):
    logits = logits.view(-1)
    targets = targets.view(-1)
    return F.mse_loss(logits, targets)


def metric_fn(logits, targets):
    preds = logits.cpu().detach().float().numpy()
    targets = targets.cpu().detach().float().numpy()
    
    dim1 = r2_score(targets[:, 0], preds[:, 0])
    dim2 = r2_score(targets[:, 1], preds[:, 1])
    dim3 = r2_score(targets[:, 2], preds[:, 2])
    
    return dim1, dim2, dim3, r2_score(targets, preds)


class MSEIgnoreNans(_Loss):
    def forward(
        self,
        input: torch.Tensor,
        target: torch.Tensor,
        weights: torch.Tensor,
    ) -> torch.Tensor:
        mask = torch.isfinite(target)
        mse = torch.mean(
            torch.mul(
                torch.square(input[mask] - target[mask]),
                torch.tile(weights[:, None], dims=(1, target.shape[1]))[mask],
            )
        )
        return torch.where(
            torch.isfinite(mse),
            mse,
            torch.tensor(0.).to(target.device),
        )

In [14]:
import math


class Identity(torch.torch.nn.Module):
    def forward(self, x):
        return x


# this is not a resnet yet
class ReZeroBlock(torch.torch.nn.Module):
    def __init__(
        self,
        in_channels,
        out_channels,
        activation_function,
        kernel_size,
        stride,
        dtype,
        norm_layer=None,
    ):
        super(ReZeroBlock, self).__init__()
        if norm_layer is None:
            norm_layer = torch.torch.nn.BatchNorm1d

        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = divmod(kernel_size, 2)[0] if stride == 1 else 0

        # does not change spatial dimension
        self.conv1 = torch.nn.Conv1d(
            in_channels,
            out_channels,
            kernel_size=1,
            stride=1,
            bias=False,
            dtype=dtype,
        )
        self.bn1 = norm_layer(out_channels, dtype=dtype)
        # Both self.conv2 and self.downsample layers
        # downsample the input when stride != 1
        self.conv2 = torch.nn.Conv1d(
            out_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=stride,
            groups=out_channels,
            bias=False,
            dtype=dtype,
            padding=self.padding,
        )
        if stride > 1:
            down_conv = torch.nn.Conv1d(
                in_channels,
                out_channels,
                kernel_size=kernel_size,
                stride=stride,
                bias=False,
                dtype=dtype,
                # groups=out_channels,
            )
        else:
            down_conv = Identity()

        self.down_sample = torch.nn.Sequential(
            down_conv,
            norm_layer(out_channels),
        )
        self.bn2 = norm_layer(out_channels, dtype=dtype)
        # does not change the spatial dimension
        self.conv3 = torch.nn.Conv1d(
            out_channels,
            out_channels,
            kernel_size=1,
            stride=1,
            bias=False,
            dtype=dtype,
        )
        self.bn3 = norm_layer(out_channels, dtype=dtype)
        self.activation = activation_function(inplace=True)
        self.factor = torch.torch.nn.parameter.Parameter(torch.tensor(0.0, dtype=dtype))

    def next_spatial_dim(self, last_spatial_dim):
        return math.floor(
            (last_spatial_dim + 2 * self.padding - self.kernel_size)
            / self.stride + 1
        )

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.activation(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.activation(out)

        out = self.conv3(out)
        out = self.bn3(out)

        # not really the identity, but kind of
        identity = self.down_sample(x)

        return self.activation(out * self.factor + identity)


class ResNetEncoder(torch.torch.nn.Module):
    def __init__(
        self,
        spectrum_size,
        cnn_encoder_channel_dims,
        activation_function,
        kernel_size,
        stride,
        dtype,
        num_blocks,
        verbose=False,
    ):
        super(ResNetEncoder, self).__init__()

        self.spatial_dims = [spectrum_size]
        layers = []
        for in_channels, out_channels in zip(
            cnn_encoder_channel_dims[:-1],
            cnn_encoder_channel_dims[1:],
        ):
            block = ReZeroBlock(
                in_channels=in_channels,
                out_channels=out_channels,
                activation_function=activation_function,
                kernel_size=kernel_size,
                stride=stride,
                dtype=dtype,
            )
            layers.append(block)
            self.spatial_dims.append(block.next_spatial_dim(self.spatial_dims[-1]))
            for _ in range(num_blocks - 1):
                block = ReZeroBlock(
                    in_channels=out_channels,
                    out_channels=out_channels,
                    activation_function=activation_function,
                    kernel_size=kernel_size,
                    stride=1,
                    dtype=dtype,
                )
                layers.append(block)
                self.spatial_dims.append(block.next_spatial_dim(self.spatial_dims[-1]))

        self.resnet_layers = torch.torch.nn.Sequential(*layers)
        if verbose:
            print("CNN Encoder Channel Dims: %s" % (cnn_encoder_channel_dims))
            print("CNN Encoder Spatial Dims: %s" % (self.spatial_dims))

    def forward(self, x):
        return self.resnet_layers(x)


class ReZeroNet(torch.nn.Module):
    def __init__(
        self,
        spectra_channels,
        spectra_size,
        initial_cnn_channels,
        cnn_channel_factor,
        num_cnn_layers,
        kernel_size,
        stride,
        activation_function,
        fc_dims,
        fc_dropout=0.0,
        dtype=None,
        verbose=False,
        fc_output_channels=1,
        num_blocks=1,
        **kwargs,
    ):
        super().__init__()
        self.fc_output_channels = fc_output_channels
        self.dtype = dtype or torch.float32

        activation_function = getattr(torch.nn, activation_function)

        # Setup CNN Encoder
        cnn_encoder_channel_dims = [spectra_channels] + [
            int(initial_cnn_channels * (cnn_channel_factor**idx))
            for idx in range(num_cnn_layers)
        ]
        self.cnn_encoder = ResNetEncoder(
            spectrum_size=spectra_size,
            cnn_encoder_channel_dims=cnn_encoder_channel_dims,
            activation_function=activation_function,
            kernel_size=kernel_size,
            stride=stride,
            num_blocks=num_blocks,
            dtype=dtype,
            verbose=verbose,
        )
        self.fc_dims = [
            int(
                self.cnn_encoder.spatial_dims[-1]
            ) * int(cnn_encoder_channel_dims[-1])
        ] + fc_dims

        if verbose:
            print("Fc Dims: %s" % self.fc_dims)
        fc_layers = []
        for idx, (in_dim, out_dim) in enumerate(
                zip(self.fc_dims[:-2], self.fc_dims[1:-1])
        ):
            fc_layers.append(torch.nn.Linear(in_dim, out_dim))
            fc_layers.append(torch.nn.ELU())
            fc_layers.append(torch.nn.Dropout(fc_dropout / (2 ** idx)))
        fc_layers.append(
            torch.nn.Linear(
                self.fc_dims[-2],
                self.fc_dims[-1] * self.fc_output_channels,
            ),
        )
        self.fc_net = torch.nn.Sequential(*fc_layers)
        if verbose:
            num_params = sum(p.numel() for p in self.parameters())
            print("Number of Parameters: %s" % num_params)

    def forward(self, spectra):
        embeddings = self.cnn_encoder(spectra)
        forecast = self.fc_net(embeddings.view(-1, self.fc_dims[0]))
        if self.fc_output_channels > 1:
            forecast = forecast.reshape(
                -1, self.fc_output_channels, self.fc_dims[-1]
            )
        return forecast


In [16]:
import math
import torch


class ResZeroBlock(torch.nn.Module):
    def __init__(self, skip_part, model_part):
        super(ResZeroBlock, self).__init__()
        self.skip_part = skip_part
        self.model_part = model_part
        self.factor = torch.nn.parameter.Parameter(torch.tensor(0.))

    def forward(self, X):
        return self.skip_part(X) + self.factor * self.model_part(X)


class Identity(torch.nn.Module):
    def forward(self, X):
        return X


class RamanXception(torch.nn.Module):
    def __init__(
        self,
        spectra_size,
        initial_channels,
        entry_channels,
        num_mid_blocks,
        exit_channels,
        num_concentrations,
        fc_dims,
        fc_dropout,
        lower_bounds=None,
        dtype=None,
        activation_function='ReLU',
        classification_idx=None,
        verbose=False,
        **kwargs,
    ):
        super(RamanXception, self).__init__()

        self.classification_idx = classification_idx or num_concentrations

        if lower_bounds is None:
            self.lower_bounds = torch.nn.parameter.Parameter(
                torch.tensor([-1000] * num_concentrations),
                requires_grad=False,
            )
        else:
            self.lower_bounds = torch.nn.parameter.Parameter(
                lower_bounds,
                requires_grad=False,
            )
        dtype = dtype or torch.float32
        activation_function = getattr(torch.nn, activation_function)
        self.spatial_dimensions = [spectra_size]

        # setup initial layers
        initial_layers = torch.nn.Sequential()
        for idx, (in_channels, out_channels) in enumerate(
            zip(
                [1] + initial_channels[:-1],
                initial_channels,
            ),
        ):
            initial_layers.add_module(
                'initial_%s' % idx,
                torch.nn.Conv1d(
                    in_channels,
                    out_channels,
                    kernel_size=3,
                    stride=2,
                    padding=1,
                    dtype=dtype,
                    bias=False,
                ),
            )
            self.spatial_dimensions.append(
                math.floor((self.spatial_dimensions[-1] - 1) / 2 + 1)
            )
            initial_layers.add_module(
                'initial_batch_%s' % idx,
                torch.nn.BatchNorm1d(
                    out_channels,
                    dtype=dtype,
                ),
            )
            initial_layers.add_module(
                'initial_activation_%s' % idx,
                activation_function(),
            )

        # Entry flow
        entry_flow = torch.nn.Sequential()
        # self.entry_flow_length = len(entry_channels)
        for idx, (in_channels, out_channels) in enumerate(
            zip(
                [initial_channels[-1]] + entry_channels[:-1],
                entry_channels,
            )
        ):
            entry_flow.add_module(
                name='entry_flow_%s' % idx,
                module=ResZeroBlock(
                    skip_part=torch.nn.Conv1d(
                        in_channels,
                        out_channels,
                        kernel_size=1,
                        stride=2,
                        dtype=dtype,
                        bias=False,
                    ),
                    model_part=torch.nn.Sequential(
                        activation_function(),
                        # spatial dimension stays constant
                        torch.nn.Conv1d(
                            in_channels,
                            in_channels,
                            kernel_size=1,
                            dtype=dtype,
                            bias=False,
                        ),
                        torch.nn.Conv1d(
                            in_channels,
                            in_channels,
                            kernel_size=3,
                            groups=in_channels,
                            padding=1,
                            dtype=dtype,
                            bias=False,
                        ),
                        # spatial dimension stays constant
                        torch.nn.BatchNorm1d(
                            in_channels,
                            dtype=dtype,
                        ),
                        activation_function(),
                        torch.nn.Conv1d(
                            in_channels,
                            out_channels,
                            kernel_size=1,
                            dtype=dtype,
                            bias=False,
                        ),
                        torch.nn.Conv1d(
                            out_channels,
                            out_channels,
                            kernel_size=3,
                            groups=out_channels,
                            padding=1,
                            dtype=dtype,
                            bias=False,
                        ),
                        torch.nn.BatchNorm1d(
                            out_channels,
                            dtype=dtype,
                        ),
                        # spatial dimension: in_dim / 2
                        torch.nn.MaxPool1d(3, stride=2, padding=1),
                    )
                ),
            )
            self.spatial_dimensions.append(
                math.floor((self.spatial_dimensions[-1] - 1) / 2 + 1)
            )

        # Middle flow
        num_mid_channels = entry_channels[-1]
        middle_flow = torch.nn.Sequential()
        for idx in range(num_mid_blocks):
            middle_flow.add_module(
                name='middle_flow_%s' % idx,
                module=ResZeroBlock(
                    skip_part=Identity(),
                    model_part=torch.nn.Sequential(
                        activation_function(),
                        torch.nn.Conv1d(
                            num_mid_channels,
                            num_mid_channels,
                            kernel_size=1,
                            dtype=dtype,
                            bias=False,
                        ),
                        torch.nn.Conv1d(
                            num_mid_channels,
                            num_mid_channels,
                            kernel_size=3,
                            groups=num_mid_channels,
                            padding=1,
                            dtype=dtype,
                            bias=False,
                        ),
                        torch.nn.BatchNorm1d(
                            num_mid_channels,
                            dtype=dtype,
                        ),
                        activation_function(),
                        torch.nn.Conv1d(
                            num_mid_channels,
                            num_mid_channels,
                            kernel_size=1,
                            dtype=dtype,
                            bias=False,
                        ),
                        torch.nn.Conv1d(
                            num_mid_channels,
                            num_mid_channels,
                            kernel_size=3,
                            groups=num_mid_channels,
                            padding=1,
                            dtype=dtype,
                            bias=False,
                        ),
                        torch.nn.BatchNorm1d(
                            num_mid_channels,
                            dtype=dtype,
                        ),
                        activation_function(),
                        torch.nn.Conv1d(
                            num_mid_channels,
                            num_mid_channels,
                            kernel_size=1,
                            dtype=dtype,
                            bias=False,
                        ),
                        torch.nn.Conv1d(
                            num_mid_channels,
                            num_mid_channels,
                            kernel_size=3,
                            groups=num_mid_channels,
                            padding=1,
                            dtype=dtype,
                            bias=False,
                        ),
                    )
                )
            )
            self.spatial_dimensions.append(self.spatial_dimensions[-1])

        exit_flow = torch.nn.Sequential()
        for idx, (in_channels, (mid_channels, out_channels)) in enumerate(list(
            zip(
                [num_mid_channels] + [out for _, out in exit_channels[:-2]],
                exit_channels[:-1],
            ),
        )):
            exit_flow.add_module(
                name='exit_flow_%s' % idx,
                module=ResZeroBlock(
                    skip_part=torch.nn.Conv1d(
                        in_channels,
                        out_channels,
                        kernel_size=1,
                        stride=2,
                        dtype=dtype,
                        bias=False,
                    ),
                    model_part=torch.nn.Sequential(
                        torch.nn.Conv1d(
                            in_channels,
                            mid_channels,
                            kernel_size=1,
                            dtype=dtype,
                            bias=False,
                        ),
                        torch.nn.Conv1d(
                            mid_channels,
                            mid_channels,
                            kernel_size=3,
                            groups=mid_channels,
                            padding=1,
                            dtype=dtype,
                            bias=False,
                        ),
                        torch.nn.BatchNorm1d(
                            mid_channels,
                            dtype=dtype,
                        ),
                        activation_function(),
                        torch.nn.Conv1d(
                            mid_channels,
                            out_channels,
                            kernel_size=1,
                            dtype=dtype,
                            bias=False,
                        ),
                        torch.nn.Conv1d(
                            out_channels,
                            out_channels,
                            kernel_size=3,
                            groups=out_channels,
                            padding=1,
                            dtype=dtype,
                            bias=False,
                        ),
                        torch.nn.BatchNorm1d(
                            out_channels,
                            dtype=dtype,
                        ),
                        activation_function(),
                        torch.nn.MaxPool1d(
                            kernel_size=3,
                            stride=2,
                            padding=1,
                        ),
                    )
                )
            )
            self.spatial_dimensions.append(
                math.floor((self.spatial_dimensions[-1] - 1) / 2 + 1)
            )

        # Last part of the exit flow
        in_channels = exit_channels[-2][1]
        mid_channels = exit_channels[-1][0]
        out_channels = exit_channels[-1][1]
        final_flow = torch.nn.Sequential(
            torch.nn.Conv1d(
                in_channels,
                mid_channels,
                kernel_size=1,
                dtype=dtype,
                bias=False,
            ),
            torch.nn.Conv1d(
                mid_channels,
                mid_channels,
                kernel_size=3,
                groups=mid_channels,
                padding=1,
                dtype=dtype,
                bias=False,
            ),
            torch.nn.BatchNorm1d(
                mid_channels,
                dtype=dtype,
            ),
            activation_function(),
            torch.nn.Conv1d(
                mid_channels,
                out_channels,
                kernel_size=1,
                dtype=dtype,
            ),
            torch.nn.Conv1d(
                out_channels,
                out_channels,
                kernel_size=3,
                groups=out_channels,
                padding=1,
                dtype=dtype,
            ),
            torch.nn.BatchNorm1d(
                out_channels,
                dtype=dtype,
            ),
            activation_function(),
        )
        self.conv_net = torch.nn.Sequential(
            initial_layers,
            entry_flow,
            middle_flow,
            exit_flow,
            final_flow,
        )

        self.fc_input_dim = int(out_channels * self.spatial_dimensions[-1])
        self.fc_net = torch.nn.Sequential()
        for idx, (in_dim, out_dim) in enumerate(
            zip(
                [self.fc_input_dim] + fc_dims[:-1],
                fc_dims,
            )
        ):
            self.fc_net.add_module(
                'fc_net_%s' % idx,
                torch.nn.Linear(
                    in_dim,
                    out_dim,
                    dtype=dtype,
                    bias=True,
                ),
            )
            self.fc_net.add_module(
                'fc_relu_%s' % idx,
                torch.nn.ReLU(),
            )
            self.fc_net.add_module(
                'fc_dropout_%s' % idx,
                torch.nn.Dropout(fc_dropout),
            )

        self.fc_net.add_module(
            'output_layer',
            torch.nn.Linear(
                fc_dims[-1] if fc_dims else out_channels,
                num_concentrations,
                dtype=dtype,
                bias=True,
            ),
        )
        self.softplus = torch.nn.Softplus()
        if verbose:
            print('Spatial dimensions: %s' % self.spatial_dimensions)
            print(
                'Fully Connected dimensions %s' % (
                        [self.fc_input_dim] + fc_dims
                )
            )

    def forward(self, x):
        x = self.conv_net(x)

        fc_output = self.fc_net(torch.reshape(x, (-1, self.fc_input_dim)))
        return torch.concat(
            [
                fc_output[:, :self.classification_idx],
                torch.sigmoid(fc_output[:, self.classification_idx:])
            ],
            dim=1,
        )


model_config = {
    'initial_channels': 8,
    'entry_channels_start': 17,
    'channel_factor': 1.5692504144354933,
    'entry_exit_length': 3,
    'num_mid_blocks': 4,
    'fc_dims': 101,
    'fc_dropout': 0.11748964300948816,
    'learning_rate': 0.001,
    'gamma': 0.9921697445978254,
    'batch_size': 21,
    'entropy_weight': 6.441421425536572,
    'uniform_sampling_range': 0.03803705551872033,
    'activation_function': 'ELU', 
    'fake_weight': 0.032878013410751736,
    'just_scale_concentrations': True,
    'entry_factor': 1.5692504144354933,
    'exit_factor': 1.5692504144354933,
    'entry_length': 3,
    'exit_length': 3,
    'spectra_size': 1643,
    'dtype': torch.float32}

lr = model_config.get('learning_rate')
l2_reg = model_config.get('l2_reg', 0.)
gamma = model_config.get('gamma', 1.)
model_config['initial_channels'] = [
    model_config['initial_channels'],
    2 * model_config['initial_channels'],
]
# create entry channel dimensions
entry_channels_start = model_config['entry_channels_start']
entry_factor = model_config['entry_factor']
entry_length = model_config['entry_length']
entry_channels = [entry_channels_start]
for _ in range(entry_length):
    entry_channels.append(int(entry_factor * entry_channels[-1]))
model_config['entry_channels'] = entry_channels
# create exit channel dimensions
exit_channels_start = entry_channels[-1]
exit_factor = model_config.get('exit_factor')
exit_length = model_config.get('exit_length')
exit_channels = [
    (
        int(exit_channels_start * math.sqrt(exit_factor)),
        int(exit_channels_start * exit_factor),
    )
]
for _ in range(1, exit_length):
    exit_channels.append(
        (
            int(exit_channels[-1][0] * math.sqrt(exit_factor)),
            int(exit_channels[-1][0] * exit_factor),
        )
    )
model_config['exit_channels'] = exit_channels
model_config["num_concnetrations"] = 3
model_config['fc_dims'] = [config['fc_dims']]

In [17]:
from tqdm.auto import tqdm


def train(
    model, 
    optimizer,
    device,
    amp_dtype,
    scheduler,
    train_dl,
    eval_dl,
    loss_fn,
    epochs,
    checkpoint_name,
    score=-float("inf"),
    neptune_run=None,
    p=True,
):  
    scaler = torch.amp.GradScaler(device)
    for epoch in tqdm(range(epochs)):
        model.train()
        total_loss = 0.0
        all_logits = []
        all_targets = []
        
        for inputs, targets, weights in train_dl:
            inputs = inputs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)
            weights = weights.to(device, non_blocking=True)
            
            optimizer.zero_grad()
            with torch.amp.autocast(device_type=device, dtype=amp_dtype, cache_enabled=True):
                logits = model(inputs)
                loss = loss_fn(logits, targets)
                  
            if amp_dtype == torch.bfloat16:                
                loss.backward()
                optimizer.step()
            else:
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()

            scheduler.step()
            if neptune_run is not None:  neptune_run["lr_step"].append(scheduler.get_last_lr()[0])
            
            total_loss += loss.detach().cpu()
            all_logits.append(logits.detach().cpu())
            all_targets.append(targets.detach().cpu())
        
        all_logits = torch.cat(all_logits)
        all_targets = torch.cat(all_targets)

        one, two, three, r2 = metric_fn(all_logits, all_targets)
        total_loss = total_loss / len(train_dl)
        
        model.eval()
        eval_total_loss = 0.0
        eval_all_logits = []
        eval_all_targets = []

        for inputs, targets, weights in eval_dl:
            inputs = inputs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)
            weights = weights.to(device, non_blocking=True)

            with torch.inference_mode():
                with torch.amp.autocast(device_type=device, dtype=amp_dtype, cache_enabled=True):
                    logits = model(inputs)
                    loss = loss_fn(logits, targets)

            eval_total_loss += loss.detach().cpu()
            eval_all_logits.append(logits.detach().cpu())
            eval_all_targets.append(targets.detach().cpu())
        
        eval_all_logits = torch.cat(eval_all_logits)
        eval_all_targets = torch.cat(eval_all_targets)

        eval_one, eval_two, eval_three, eval_r2 = metric_fn(eval_all_logits, eval_all_targets)
        eval_total_loss = eval_total_loss / len(eval_dl)
        
        if eval_r2 > score:
            score = eval_r2
            data = {"state_dict": model.state_dict()}
            data["epoch"] = epoch 
            data["score"] = score
            torch.save(data, f"/kaggle/working/{checkpoint_name}")
        
        if neptune_run is not None:
            neptune_run["train/loss"].append(total_loss)
            neptune_run["eval/loss"].append(eval_total_loss)
            neptune_run["train/r2"].append(r2)
            neptune_run["eval/r2"].append(eval_r2)
            neptune_run["train/one"].append(one)
            neptune_run["train/two"].append(two)
            neptune_run["train/three"].append(three)
            neptune_run["eval/one"].append(eval_one)
            neptune_run["eval/two"].append(eval_two)
            neptune_run["eval/three"].append(eval_three)
            
        if p and epoch % 5 == 0:
            print(
                f"Epoch: {epoch}, "
                f"train/loss: {total_loss:.4f}, "
                f"eval/loss: {eval_total_loss:.4f}, "
                f"train/r2: {r2:.4f}, "
                f"eval/r2: {eval_r2:.4f}, "
                f"train/one: {one:.4f}, "
                f"train/two: {two:.4f}, "
                f"train/three: {three:.4f}, "
                f"eval/one: {eval_one:.4f}, "
                f"eval/two: {eval_two:.4f}, "
                f"eval/three: {eval_three:.4f} "
            )
            
    if neptune_run is not None: neptune_run.stop()
    return score

In [18]:
EPOCHS = 100
WD = 1e-3
LR = 1e-4

DROPOUT = 0.5
DROP_PATH_RATE = None

device = "cuda" if torch.cuda.is_available() else "cpu"
RESUME = False

if False:
    config["dtype"] = torch.float32
    config["spectra_size"] = 1643
    config["spectra_channels"] = 1
    config["fc_dims"] = [
        config["fc_dims"],
        int(config["fc_dims"] / 2),
        3,
    ]

    #mse_loss_function = MSEIgnoreNans()

In [None]:
from sklearn.model_selection import KFold


inputs_mean_std = []
targets_mean_std = []
scores = []
kfold = KFold(n_splits=5, shuffle=True, random_state=SEED)
splits = kfold.split(inputs)

for fold, (train_idx, eval_idx) in enumerate(splits):
    MODEL_NAME = f"finetune.xception.F{fold}"
    checkpoint_name = f"finetune.xception.F{fold}.pt"
    
    train_inputs = inputs[train_idx]
    train_targets = targets[train_idx]
    eval_inputs = inputs[eval_idx]
    eval_targets = targets[eval_idx]

    train_ds = get_dataset(train_inputs, train_targets, config)
    
    inputs_mean_std.append((fold, train_ds.s_mean, train_ds.s_std))
    targets_mean_std.append((fold, train_ds.concentration_means, train_ds.concentration_stds))
    
    eval_ds = get_dataset(
        eval_inputs, 
        eval_targets, 
        config,
        (train_ds.s_mean, train_ds.s_std), 
        (train_ds.concentration_means, train_ds.concentration_stds)
    )
    
    BATCH_SIZE = 32
    train_dl, eval_dl = return_dls(train_ds, eval_ds, BATCH_SIZE, len(eval_ds))
    
    #model = convnextv2_atto().to(device)
    #model = ReZeroNet(**config).to(device)
    #model = ResNet(dropout=DROPOUT).to(device)
    model = RamanXception(
        **model_config,
        classification_idx=3,
        num_concentrations=3
    ).to(device)
    
    ckpt = get_ckpt("/kaggle/input/baseline/pytorch/default/1/xception.pretrain.0.pt")["state_dict"]
    model.load_state_dict(ckpt)
    
    if fold == 0: print(get_model_size(model))
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WD, foreach=True)
    scheduler = get_scheduler(optimizer, train_dl, EPOCHS)
    
    score = train(
            model, 
            optimizer, 
            device,
            torch.float16,
            scheduler,
            train_dl, 
            eval_dl,
            loss_fn,
            EPOCHS,
            checkpoint_name,
            neptune_run=setup_neptune(),
        )
    
    scores.append(score)

0.387266
None




[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/arbaaz/kaggle-spect/e/KAG-312


  0%|          | 0/100 [00:00<?, ?it/s]



Epoch: 0, train/loss: 2.3302, eval/loss: 3.0766, train/r2: -1.5368, eval/r2: -2.6660, train/one: -3.6786, train/two: -0.5999, train/three: -0.3318, eval/one: -3.9156, eval/two: -2.2408, eval/three: -1.8416 
Epoch: 5, train/loss: 0.9565, eval/loss: 1.1033, train/r2: -0.0513, eval/r2: -0.1970, train/one: -0.1301, train/two: 0.0091, train/three: -0.0328, eval/one: 0.0352, eval/two: -0.5632, eval/three: -0.0631 
Epoch: 10, train/loss: 0.9050, eval/loss: 0.8658, train/r2: 0.0249, eval/r2: 0.0124, train/one: 0.1203, train/two: -0.0441, train/three: -0.0016, eval/one: 0.1114, eval/two: -0.0580, eval/three: -0.0164 
Epoch: 15, train/loss: 0.7501, eval/loss: 0.8563, train/r2: 0.2130, eval/r2: 0.1111, train/one: 0.3027, train/two: 0.2005, train/three: 0.1357, eval/one: 0.2962, eval/two: -0.0632, eval/three: 0.1003 
Epoch: 20, train/loss: 0.6047, eval/loss: 0.7669, train/r2: 0.3373, eval/r2: 0.0356, train/one: 0.4473, train/two: 0.1775, train/three: 0.3871, eval/one: 0.4909, eval/two: -0.0280, ev

  0%|          | 0/100 [00:00<?, ?it/s]



Epoch: 0, train/loss: 2.7953, eval/loss: 2.7959, train/r2: -1.5585, eval/r2: -2.0534, train/one: -3.8298, train/two: -0.3950, train/three: -0.4507, eval/one: -2.9824, eval/two: -2.4701, eval/three: -0.7079 
Epoch: 5, train/loss: 1.1114, eval/loss: 0.9856, train/r2: -0.1442, eval/r2: -0.2581, train/one: -0.3130, train/two: -0.0959, train/three: -0.0236, eval/one: -0.4660, eval/two: -0.1328, eval/three: -0.1754 
Epoch: 10, train/loss: 0.8912, eval/loss: 0.8087, train/r2: 0.1094, eval/r2: -0.0637, train/one: 0.1254, train/two: 0.0769, train/three: 0.1260, eval/one: 0.2060, eval/two: 0.0541, eval/three: -0.4513 
Epoch: 15, train/loss: 0.8787, eval/loss: 0.6486, train/r2: 0.1466, eval/r2: 0.2044, train/one: 0.1576, train/two: 0.0981, train/three: 0.1840, eval/one: 0.2417, eval/two: 0.1358, eval/three: 0.2358 
Epoch: 20, train/loss: 0.6800, eval/loss: 0.8336, train/r2: 0.2844, eval/r2: 0.2100, train/one: 0.3685, train/two: 0.1087, train/three: 0.3758, eval/one: 0.3386, eval/two: -0.0213, eva

  0%|          | 0/100 [00:00<?, ?it/s]



Epoch: 0, train/loss: 2.6998, eval/loss: 3.0836, train/r2: -1.9631, eval/r2: -1.3929, train/one: -4.6018, train/two: -1.1673, train/three: -0.1203, eval/one: -2.6100, eval/two: -1.4591, eval/three: -0.1097 
Epoch: 5, train/loss: 1.1375, eval/loss: 0.9349, train/r2: -0.2207, eval/r2: -0.2453, train/one: -0.4339, train/two: -0.0646, train/three: -0.1637, eval/one: -0.0839, eval/two: -0.3719, eval/three: -0.2801 
Epoch: 10, train/loss: 0.9031, eval/loss: 1.0995, train/r2: 0.0290, eval/r2: -0.0243, train/one: 0.0721, train/two: 0.0580, train/three: -0.0430, eval/one: 0.1146, eval/two: -0.2238, eval/three: 0.0362 
Epoch: 15, train/loss: 0.8068, eval/loss: 0.8233, train/r2: 0.1633, eval/r2: 0.0065, train/one: 0.1994, train/two: 0.1389, train/three: 0.1518, eval/one: 0.1944, eval/two: -0.3007, eval/three: 0.1259 
Epoch: 20, train/loss: 0.7601, eval/loss: 0.8574, train/r2: 0.2808, eval/r2: 0.1075, train/one: 0.3294, train/two: 0.2225, train/three: 0.2904, eval/one: 0.2692, eval/two: -0.0928, e

  0%|          | 0/100 [00:00<?, ?it/s]



Epoch: 0, train/loss: 2.3995, eval/loss: 2.3529, train/r2: -1.5230, eval/r2: -0.7191, train/one: -3.4630, train/two: -0.8923, train/three: -0.2138, eval/one: -1.5425, eval/two: -0.3646, eval/three: -0.2501 
Epoch: 5, train/loss: 1.1211, eval/loss: 1.9796, train/r2: -0.2743, eval/r2: -1.3688, train/one: -0.6269, train/two: -0.0657, train/three: -0.1303, eval/one: -0.6926, eval/two: -1.0884, eval/three: -2.3255 
Epoch: 10, train/loss: 1.0025, eval/loss: 1.2340, train/r2: 0.0570, eval/r2: -0.2364, train/one: 0.0836, train/two: 0.0463, train/three: 0.0410, eval/one: -0.9048, eval/two: 0.1027, eval/three: 0.0929 
Epoch: 15, train/loss: 0.8716, eval/loss: 1.1473, train/r2: 0.1259, eval/r2: 0.1104, train/one: 0.2102, train/two: 0.0296, train/three: 0.1380, eval/one: 0.2359, eval/two: 0.1083, eval/three: -0.0132 
Epoch: 20, train/loss: 0.6749, eval/loss: 0.8065, train/r2: 0.2634, eval/r2: 0.1645, train/one: 0.3405, train/two: 0.1561, train/three: 0.2935, eval/one: 0.0286, eval/two: 0.1072, eva

  0%|          | 0/100 [00:00<?, ?it/s]



Epoch: 0, train/loss: 2.6479, eval/loss: 3.1371, train/r2: -1.3944, eval/r2: -2.3154, train/one: -3.3117, train/two: -0.7180, train/three: -0.1534, eval/one: -4.3701, eval/two: -1.6845, eval/three: -0.8918 
Epoch: 5, train/loss: 1.1597, eval/loss: 0.9949, train/r2: -0.1414, eval/r2: -0.3167, train/one: -0.1774, train/two: -0.1409, train/three: -0.1060, eval/one: -0.1051, eval/two: -0.3928, eval/three: -0.4523 
Epoch: 10, train/loss: 0.9023, eval/loss: 0.8721, train/r2: 0.0859, eval/r2: -0.0613, train/one: 0.1111, train/two: 0.0578, train/three: 0.0889, eval/one: 0.0890, eval/two: -0.0793, eval/three: -0.1935 
Epoch: 15, train/loss: 0.8085, eval/loss: 0.6531, train/r2: 0.2453, eval/r2: 0.0923, train/one: 0.3152, train/two: 0.1797, train/three: 0.2410, eval/one: 0.1626, eval/two: -0.1136, eval/three: 0.2279 
Epoch: 20, train/loss: 0.6574, eval/loss: 0.6764, train/r2: 0.3208, eval/r2: 0.1185, train/one: 0.4356, train/two: 0.1012, train/three: 0.4255, eval/one: 0.2892, eval/two: -0.4331, e

In [20]:
class SpectralTestDataset(Dataset):
    def __init__(
        self,
        spectra,
        concentrations,
        dtype=None,
        spectra_mean_std=None,
        concentration_mean_std=None,
        combine_spectra_range=0.0,
        baseline_factor_bound=0.0,
        baseline_period_lower_bound=100.0,
        baseline_period_upper_bound=200.0,
        augment_slope_std=0.0,
        augment_intersept_std=0.0,
        rolling_bound=0,
        spectrum_rolling_sigma=0.0,
        augmentation_weight=0.1,
        original_datapoint_weight=1.,
    ):
        self.dtype = dtype or torch.float32
        self.combine_spectra_range = combine_spectra_range
        self.baseline_factor_bound = baseline_factor_bound
        self.augment_slope_std = augment_slope_std
        self.augment_intercept_std = augment_intersept_std
        self.baseline_period_lower_bound = baseline_period_lower_bound
        self.baseline_period_upper_bound = baseline_period_upper_bound
        self.rolling_bound = rolling_bound
        self.spectrum_rolling_sigma = spectrum_rolling_sigma
        self.augmentation_weight = torch.tensor(augmentation_weight, dtype=dtype)
        self.original_dp_weight = original_datapoint_weight

        # normalize spectra
        spectra = torch.tensor(spectra, dtype=dtype)

        if spectra_mean_std is None:
            self.s_mean = torch.mean(spectra)
            self.s_std = torch.std(spectra)
        else:
            self.s_mean, self.s_std = spectra_mean_std

        self.spectra = torch.divide(
            torch.subtract(spectra, self.s_mean),
            self.s_std,
        )

        self.dummy_wns = np.tile(
            np.arange(
                0., 1., 1. / self.spectra.shape[2],
                dtype=np_dtype_from_torch[self.dtype]
            )[None, :self.spectra.shape[2]],
            (self.spectra.shape[1], 1),
        )

        if False:
            # normalize concentrations
            concentrations = torch.tensor(concentrations, dtype=dtype)
            if concentration_mean_std is None:
                self.concentration_means = torch.nanmean(concentrations, dim=0)

                self.concentration_stds = torch.maximum(
                    torch.tensor(
                        [
                            torch.std(col[torch.logical_not(torch.isnan(col))])
                            for col in concentrations.T
                        ]
                    ),
                    torch.tensor([1e-3] * concentrations.shape[1]),
                )
            else:
                self.concentration_means = concentration_mean_std[0]
                self.concentration_stds = concentration_mean_std[1]

            self.concentrations = torch.divide(
                torch.subtract(
                    concentrations,
                    self.concentration_means,
                ),
                self.concentration_stds,
            )

    def pick_two(self, max_idx=None):
        max_idx = max_idx or len(self)
        return random.choices(range(max_idx), k=2)

    def __len__(self):
        return 96

    def augment_spectra(self, spectra):
        if self.augment_slope_std > 0.0:

            def spectrum_approximation(x, slope, intercept):
                return (slope * x + intercept).reshape(-1, 1)[:, 0]

            slope, inter = scipy.optimize.curve_fit(
                spectrum_approximation,
                self.dummy_wns,
                spectra.reshape(-1, 1)[:, 0],
                p0=np.random.rand(2),
            )[0]

            new_slope = slope * (
                    np.random.gamma(
                        shape=1. / self.augment_slope_std,
                        scale=self.augment_slope_std,
                        size=1,
                    )
            )[0]
            new_intercept = inter * (
                1.0 + np.random.randn(1) * self.augment_intercept_std
            )[0]
            spectra += torch.tensor(
                (new_slope - slope)
            ) * self.dummy_wns + new_intercept - inter

        factor = self.baseline_factor_bound * torch.rand(size=(1,))
        offset = torch.rand(size=(1,)) * 2.0 * torch.pi
        period = self.baseline_period_lower_bound + (
            self.baseline_period_upper_bound - self.baseline_period_lower_bound
        ) * torch.rand(size=(1,))
        permutations = factor * torch.cos(
            2.0 * torch.pi / period * self.dummy_wns + offset
        )
        return self.roll_spectrum(
            spectra + permutations * spectra,
            delta=random.randint(-self.rolling_bound, self.rolling_bound),
        )

    def roll_spectrum(self, spectra, delta):
        num_spectra = spectra.shape[0]
        rolled_spectra = np.roll(spectra, delta, axis=1)
        if delta > 0:
            rolled_spectra[:, :delta] = (
                np.random.rand(num_spectra, delta) * self.spectrum_rolling_sigma + 1
            ) * rolled_spectra[:, delta:(delta + 1)]
        elif delta < 0:
            rolled_spectra[:, delta:] = (
                np.random.rand(num_spectra, -delta) * self.spectrum_rolling_sigma + 1
            ) * rolled_spectra[:, delta - 1:delta]
        return rolled_spectra

    def combine_k_items(self, indices, weights):
        return (
            # spectra
            torch.sum(
                torch.mul(weights[:, None, None], self.spectra[indices, :, :]),
                dim=0,
            ),
            # concentrations
            #torch.sum(
            #    torch.mul(weights[:, None], self.concentrations[indices, :]),
            #    dim=0,
            #)
        )

    def __getitem__(self, idx):
        if True:#self.combine_spectra_range < 1e-12:
            spectrum = self.spectra[idx]
            #spectrum = self.augment_spectra(spectrum)
            return spectrum
        else:
            if random.random() < self.original_dp_weight:
                one_weight = 1.
                label_weight = torch.tensor(1.0, dtype=self.dtype)
            else:
                one_weight = random.uniform(0.0, self.combine_spectra_range)
                label_weight = self.augmentation_weight
            weights = torch.tensor([one_weight, (1 - one_weight)])
            # just pick two random indices
            indices = random.choices(range(len(self)), k=2)

            mixed_spectra = self.combine_k_items(
                indices=indices,
                weights=weights,
            )
            mixed_spectra = self.augment_spectra(mixed_spectra[0])
            return mixed_spectra
        
  
def get_test_dataset(inputs, inputs_mean_std, targets_mean_std):
    return SpectralTestDataset(
        spectra=inputs[:, None, :],
        concentrations=None,
        dtype=torch.float32,
        spectra_mean_std=inputs_mean_std,
        concentration_mean_std=targets_mean_std,
        combine_spectra_range=1.0,
        baseline_factor_bound=config["baseline_factor_bound"],
        baseline_period_lower_bound=config["baseline_period_lower_bound"],
        baseline_period_upper_bound=(config["baseline_period_lower_bound"] + config["baseline_period_span"]),
        augment_slope_std=config["augment_slope_std"],
        augment_intersept_std=0.0,
        rolling_bound=config["rolling_bound"],
        spectrum_rolling_sigma=0.01,
        augmentation_weight=0.1,
        original_datapoint_weight=1.,
    )

In [21]:
ckpt_paths = get_ckpt_paths("/kaggle/working/", "finetune")
ckpt_paths

/kaggle/working/finetune.baseline.resnet.F0.pt 92 0.8889310253438752
/kaggle/working/finetune.baseline.resnet.F1.pt 83 0.9428886696482195
/kaggle/working/finetune.baseline.resnet.F2.pt 62 0.9188509923921502
/kaggle/working/finetune.baseline.resnet.F3.pt 94 0.9387695188878328
/kaggle/working/finetune.baseline.resnet.F4.pt 65 0.9315321986159409
/kaggle/working/finetune.xception.F0.pt 93 0.8760090686615679
/kaggle/working/finetune.xception.F1.pt 97 0.8991505598176587
/kaggle/working/finetune.xception.F2.pt 81 0.8787869812044655
/kaggle/working/finetune.xception.F3.pt 94 0.8667093844091135
/kaggle/working/finetune.xception.F4.pt 77 0.8830788332910499
/kaggle/working/finetune.xception.resnet.F0.pt 59 0.7940462473673003


['/kaggle/working/finetune.baseline.resnet.F0.pt',
 '/kaggle/working/finetune.baseline.resnet.F1.pt',
 '/kaggle/working/finetune.baseline.resnet.F2.pt',
 '/kaggle/working/finetune.baseline.resnet.F3.pt',
 '/kaggle/working/finetune.baseline.resnet.F4.pt',
 '/kaggle/working/finetune.xception.F0.pt',
 '/kaggle/working/finetune.xception.F1.pt',
 '/kaggle/working/finetune.xception.F2.pt',
 '/kaggle/working/finetune.xception.F3.pt',
 '/kaggle/working/finetune.xception.F4.pt',
 '/kaggle/working/finetune.xception.resnet.F0.pt']

In [22]:
targets_mean_std[1][1:]

(tensor([6.8227, 1.1995, 1.6005]), tensor([2.9272, 0.5459, 0.6937]))

In [24]:
from torch.utils.data import DataLoader


def inference(test_inputs, ckpt_name, i):
    ckpt = get_ckpt(ckpt_name)
    
    test_ds = get_test_dataset(test_inputs, inputs_mean_std[i][1:], targets_mean_std[i][1:]) #[i][1:]
    test_dl = DataLoader(test_ds, batch_size=32)

    #model = ReZeroNet(**config).to(device)
    #model = ResNet(dropout=DROPOUT).to(device)
    model = RamanXception(
        **model_config,
        classification_idx=3,
        num_concentrations=3
    ).to(device)
    
    model.load_state_dict(ckpt["state_dict"])
    model.eval()
    
    all_preds = []
    for inputs in test_dl:
        with torch.inference_mode():
            preds = model(inputs.cuda())
            preds = preds.double() 
            all_preds.append(cuda_to_np(preds))
            
    preds = np.concatenate(all_preds)
    mus = targets_mean_std[i][1:][0] #[i][1:][0]
    sigmas = targets_mean_std[i][1:][1] #[i][1:][1]

    for i in range(3):
        preds[:, i] = reverse_zscore(preds[:, i], mus[i].numpy(), sigmas[i].numpy())
    
    return preds

preds = inference(test_inputs, "/kaggle/working/finetune.xception.F1.pt", 1) # CAREFUL ABOUT INDEX
generate_csv(preds, "/kaggle/working/finetune.xception.F1.pt.89.csv")
preds

array([[ 3.66954874,  0.78885008,  0.66945825],
       [ 6.02439293,  1.72374044,  1.86294906],
       [ 4.73387718,  0.66746245,  1.11429106],
       [ 3.36186427,  0.95707884,  0.60138372],
       [10.74335563,  1.08469177,  1.04640487],
       [ 7.79149424,  1.85122653,  0.87439316],
       [ 6.45080018,  0.73214387,  0.4693115 ],
       [ 6.17747787,  2.04762333,  1.14821508],
       [ 6.78303218,  1.93231102,  1.18067036],
       [ 9.51890954,  0.78413444,  0.33448671],
       [ 9.3611771 ,  0.82066024,  1.16067062],
       [ 2.83731915,  1.2601258 ,  0.96025776],
       [ 4.37750159,  1.13258094,  1.13889522],
       [ 4.98539773,  0.97945887,  1.86429871],
       [ 4.52354546,  1.22185777,  1.09667284],
       [ 8.81121107,  0.7327261 ,  0.87053035],
       [ 5.44735984,  0.92049363,  1.02608077],
       [ 5.73599372,  1.14094102,  1.15152851],
       [ 6.62377178,  1.38141356,  1.19645389],
       [ 4.02276778,  0.86357384,  1.19953272],
       [ 5.91987985,  0.86746868,  0.871

In [25]:
get_stats(targets, minmax=True), get_stats(preds, minmax=True)

Min: 0.276526487, Max: 11.88990894, Mean: 3.208722795402778, Std: 3.1291512817697695
Min: 0.22718540335807802, Max: 10.743355628477673, Mean: 2.607027186618872, Std: 2.4520468249081184


(None, None)

In [None]:
get_stats(targets, minmax=True), get_stats(preds, minmax=True)

Min: 0.276526487, Max: 11.88990894, Mean: 3.208722795402778, Std: 3.1291512817697695
Min: 0.21520479832992123, Max: 9.941416729453636, Mean: 2.220094471803795, Std: 1.9768473694491508


(None, None)

In [None]:
def ensemble_inference(ckpt_paths):
    test_inputs = get_test_data()
    all_preds = []

    for i, ckpt_path in enumerate(ckpt_paths):
        ckpt = get_ckpt(ckpt_path)
        
        model = ReZeroNet(**config).to(device)
        model.load_state_dict(ckpt["state_dict"])
        model.eval()

        test_ds = get_test_dataset(test_inputs, inputs_mean_std[i][1:], targets_mean_std[i][1:])
        test_dl = DataLoader(test_ds, batch_size=32)
        
        fold_preds = []
        for inputs in test_dl:
            with torch.inference_mode():
                preds = model(inputs.cuda())
                preds = cuda_to_np(preds.double())
                fold_preds.append(preds)
                
        fold_preds = np.concatenate(fold_preds)
        
        means = targets_mean_std[i][1:][0]
        stds = targets_mean_std[i][1:][1]
        for i in range(3):
            fold_preds[:, i] = reverse_zscore(fold_preds[:, i], means[i].numpy(), stds[i].numpy())
            
        all_preds.append(fold_preds)

    return np.mean(all_preds, axis=0)

preds = ensemble_inference(ckpt_paths)
generate_csv(preds, "paper.finetune.avg.pretrain.weights.ensemble.csv")
preds