In [None]:
!pip3 install -q  neptune

In [None]:
import random
import torch
import numpy as np


def setup_reproducibility(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(False, warn_only=True)
    torch.set_float32_matmul_precision("high")
    
SEED = 1000
setup_reproducibility(SEED)

In [None]:
from scipy import signal
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from huggingface_hub import login, snapshot_download
from tqdm.auto import tqdm


def get_stats(tensor, p=True, r=False):
    mean, std = tensor.mean(), tensor.std()
    min, max =  tensor.min(), tensor.max()
    
    if p: print(f"Min: {min}, Max: {max}, Mean: {mean}, Std: {std}")
    if r: return min, max, mean, std
    
    
def zscore(tensor, mean=None, std=None):
    if mean is None: mean = tensor.mean()
    if std is None: std = tensor.std()
    return (tensor - mean) / (std + 1e-8)


def get_model_size(model):
    print(sum(p.numel() for p in model.parameters()) / 1e6)
    

def get_index(iterable):
    return random.randint(0, len(iterable) - 1)


def get_indices(iterable, n):
    return random.sample(range(len(iterable)), n)


def split(inputs, targets, seed):
    return train_test_split(
        inputs,
        targets, 
        test_size=0.2,
        shuffle=True, 
        random_state=seed
    ) 


def show_waves(waves, dpi=100):
    """
    waves: numpy array of shape (3, N)
    Creates three separate figures that stretch wide.
    """
    N = waves.shape[1]
    t = np.arange(N)

    # Wide aspect ratio; height modest so each window fills width
    for i in range(waves.shape[0]):
        fig = plt.figure(figsize=(14, 4), dpi=dpi)  # wide figure
        ax = fig.add_subplot(111)
        ax.plot(t, waves[i], linewidth=1)
        ax.set_title(f"Wave {i+1}")
        ax.set_xlabel("Sample")
        ax.set_ylabel("Amplitude")
        ax.grid(True)
        fig.tight_layout()  # reduce margins to use width
        
    plt.show()
    
    
def hf_ds_download(hf_token, repo_id):
    login(hf_token[1:])
    return snapshot_download(repo_id, repo_type="dataset")


def get_spectra_features(X, b=False):
    """Create multi-channel features from spectra: raw, 1st derivative, 2nd derivative."""
    X_processed = np.zeros_like(X)
    # Baseline correction and SNV
    for i in tqdm(range(X.shape[0])):
        poly = np.polyfit(np.arange(X.shape[1]), X[i], 3)
        baseline = np.polyval(poly, np.arange(X.shape[1]))
        corrected_spec = X[i] - baseline
        #X_processed[i] = (corrected_spec - corrected_spec.mean()) / (corrected_spec.std() + 1e-8)
        X_processed[i] = corrected_spec
        
    # Calculate derivatives
    deriv1 = signal.savgol_filter(X_processed, window_length=11, polyorder=3, deriv=1, axis=1)
    deriv2 = signal.savgol_filter(X_processed, window_length=11, polyorder=3, deriv=2, axis=1)

    if b: return np.stack([X_processed, deriv1, deriv2], axis=1)
    return np.stack([deriv1, deriv2], axis=1)

In [None]:
import os

path = "/kaggle/input/dig-4-bio-raman-transfer-learning-challenge"
files = os.listdir(path)
[(i, files[i]) for i in range(len(files))]

In [None]:
import pandas as pd


def load_transfer_data():
    csv_path = os.path.join(path, files[5])
    df = pd.read_csv(csv_path)

    input_cols = df.columns[1:2049]
    target_cols = df.columns[2050:]

    targets  = df[target_cols].dropna().to_numpy()

    df = df[input_cols]
    df['Unnamed: 1'] = df['Unnamed: 1'].str.replace("[\[\]]", "", regex=True).astype('int64')
    df['Unnamed: 2048'] = df['Unnamed: 2048'].str.replace("[\[\]]", "", regex=True).astype('int64')

    inputs = df.to_numpy().reshape(-1, 2, 2048)
    inputs = inputs.mean(axis=1)

    return inputs, targets


def load_test_data():
    test = pd.read_csv(os.path.join(path, files[6]))

    row1 = test.columns[1:].to_numpy().copy()
    row1[-1] = "5611"
    row1 = row1.astype(np.float64)


    cols = test.columns[1:]
    test = test[cols]
    test[" 5611]"] = test[" 5611]"].str.replace('[\[\]]', '', regex=True).astype('int64')
    test = test.to_numpy()

    test = np.insert(test, 0, row1, axis=0)
    return test.reshape(-1, 2, 2048).mean(axis=1)


def load_all_datasets():
    train_inputs = []
    train_targets = []
    
    timegate = pd.read_csv(os.path.join(path, files[1]))

    timegate.drop(columns="fold_idx", inplace=True)
    timegate.drop(columns="MSM_present", inplace=True)
    timegate_inputs = timegate[timegate.columns[:-3]].to_numpy()
    timegate_targets = timegate[timegate.columns[-3:]].to_numpy()

    train_inputs.append(timegate_inputs)
    train_targets.append(timegate_targets)
    
    mettler_toledo = pd.read_csv(os.path.join(path, files[2]))

    mettler_toledo.drop(columns="fold_idx", inplace=True)
    mettler_toledo.drop(columns="MSM_present", inplace=True)
    mettler_toledo_inputs = mettler_toledo[mettler_toledo.columns[:-3]].to_numpy()
    mettler_toledo_targets = mettler_toledo[mettler_toledo.columns[-3:]].to_numpy()

    train_inputs.append(mettler_toledo_inputs)
    train_targets.append(mettler_toledo_targets)
    
    kaiser = pd.read_csv(os.path.join(path, files[3]))

    kaiser.drop(columns="fold_idx", inplace=True)
    kaiser.drop(columns="MSM_present", inplace=True)
    kaiser_inputs = kaiser[kaiser.columns[:-3]].to_numpy()
    kaiser_targets = kaiser[kaiser.columns[-3:]].to_numpy()

    train_inputs.append(kaiser_inputs)
    train_targets.append(kaiser_targets)
    
    anton = pd.read_csv(os.path.join(path, files[4]))

    anton.drop(columns="fold_idx", inplace=True)
    anton.drop(columns="MSM_present", inplace=True)
    anton_inputs = anton[anton.columns[:-3]].to_numpy()
    anton_targets = anton[anton.columns[-3:]].to_numpy()

    train_inputs.append(anton_inputs)
    train_targets.append(anton_targets)
    
    tornado = pd.read_csv(os.path.join(path, files[7]))

    tornado.drop(columns="fold_idx", inplace=True)
    tornado.drop(columns="MSM_present", inplace=True)
    tornado_inputs = tornado[tornado.columns[:-3]].to_numpy()
    tornado_targets = tornado[tornado.columns[-3:]].to_numpy()

    train_inputs.append(tornado_inputs)
    train_targets.append(tornado_targets)
    
    csv_path = os.path.join(path, files[8])
    tec5 = pd.read_csv(csv_path)

    tec5.drop(columns="fold_idx", inplace=True)
    tec5.drop(columns="MSM_present", inplace=True)
    tec5_inputs = tec5[tec5.columns[:-3]].to_numpy()
    tec5_targets = tec5[tec5.columns[-3:]].to_numpy()

    train_inputs.append(tec5_inputs)
    train_targets.append(tec5_targets)
    
    csv_path = os.path.join(path, files[9])
    metrohm = pd.read_csv(csv_path)

    metrohm.drop(columns="fold_idx", inplace=True)
    metrohm.drop(columns="MSM_present", inplace=True)
    metrohm_inputs = metrohm[metrohm.columns[:-3]].to_numpy()
    metrohm_targets = metrohm[metrohm.columns[-3:]].to_numpy()

    train_inputs.append(metrohm_inputs)
    train_targets.append(metrohm_targets)
    
    csv_path = os.path.join(path, files[10])
    anton785 = pd.read_csv(csv_path)

    anton785.drop(columns="fold_idx", inplace=True)
    anton785.drop(columns="MSM_present", inplace=True)
    anton785_inputs = anton785[anton785.columns[:-3]].to_numpy()
    anton785_targets = anton785[anton785.columns[-3:]].to_numpy()

    train_inputs.append(anton785_inputs)
    train_targets.append(anton785_targets)
    
    return train_inputs, train_targets

inputs_list, targets_list = load_all_datasets()

In [None]:
for i in range(len(inputs_list)):
    indices = get_indices(inputs_list[i], 96)
    inputs_list[i] = inputs_list[i][indices]
    targets_list[i] = targets_list[i][indices]

In [None]:
inputs_list = [get_spectra_features(inputs_list[i]) for i in range(len(inputs_list))]
[i.shape for i in inputs_list]

In [None]:
import torch.nn.functional as F

def foo(inputs, targets):
    for i in range(len(inputs)):
        x = inputs[i]
        x = torch.tensor(x)
        inputs[i] = F.interpolate(x, size=2048, mode="nearest-exact")

    inputs = torch.cat(inputs)
    targets = [torch.tensor(t) for t in targets]
    targets = torch.cat(targets)
    return inputs, targets
    
inputs, targets = foo(inputs_list, targets_list)
inputs.shape, targets.shape

In [None]:
import torch.nn.functional as F


if False:
    for i in range(len(inputs_list)):
        inputs = inputs_list[i]
        targets = targets_list[i]
        train_inputs, eval_inputs, train_targets, eval_targets = split(inputs, targets, SEED)
        
        inputs_list[i] = (train_inputs, eval_inputs)
        targets_list[i] = (train_targets, eval_targets)
        
    def foo(inputs, targets):
        for i in range(len(inputs)):
            train_inputs, eval_inputs = inputs[i]
            train_inputs = torch.tensor(train_inputs)
            eval_inputs = torch.tensor(eval_inputs)
            train_inputs = F.interpolate(train_inputs, size=2048, mode="nearest-exact")
            eval_inputs = F.interpolate(eval_inputs, size=2048, mode="nearest-exact")   
            inputs[i] = (train_inputs, eval_inputs)

        train_inputs = [i[0] for i in inputs]
        eval_inputs = [i[1] for i in inputs]
        train_inputs = torch.cat(train_inputs)
        eval_inputs = torch.cat(eval_inputs)
        
        train_targets = [torch.tensor(t[0]) for t in targets]
        eval_targets = [torch.tensor(t[1]) for t in targets]
        train_targets = torch.cat(train_targets)
        eval_targets = torch.cat(eval_targets)

        return train_inputs, eval_inputs, train_targets, eval_targets
        
    train_inputs, eval_inputs, train_targets, eval_targets = foo(inputs_list, targets_list)
    train_inputs.shape, eval_inputs.shape, train_targets.shape, eval_targets.shape

In [None]:
import torch.nn.functional as F


def foo(inputs, targets):
    for i in range(len(inputs)):
        train_inputs, eval_inputs = inputs[i]
        train_inputs = torch.tensor(train_inputs)
        eval_inputs = torch.tensor(eval_inputs)
        train_inputs = F.interpolate(train_inputs, size=2048, mode="nearest-exact")
        eval_inputs = F.interpolate(eval_inputs, size=2048, mode="nearest-exact")   
        inputs[i] = (train_inputs, eval_inputs)

    train_inputs = [i[0] for i in inputs]
    eval_inputs = [i[1] for i in inputs]
    train_inputs = torch.cat(train_inputs)
    eval_inputs = torch.cat(eval_inputs)
    
    train_targets = [torch.tensor(t[0]) for t in targets]
    eval_targets = [torch.tensor(t[1]) for t in targets]
    train_targets = torch.cat(train_targets)
    eval_targets = torch.cat(eval_targets)

    return train_inputs, eval_inputs, train_targets, eval_targets
    
train_inputs, eval_inputs, train_targets, eval_targets = foo(inputs_list, targets_list)
train_inputs.shape, eval_inputs.shape, train_targets.shape, eval_targets.shape

In [None]:
transfer_inputs, transfer_targets = load_transfer_data()
transfer_inputs = get_spectra_features(transfer_inputs)
train_transfer_inputs, eval_transfer_inputs, train_transfer_targets, eval_transfer_targets = split(transfer_inputs, transfer_targets, SEED)

In [None]:
train_inputs = torch.cat([train_inputs, torch.tensor(train_transfer_inputs)])
eval_inputs = torch.cat([eval_inputs, torch.tensor(eval_transfer_inputs)])
train_targets = torch.cat([train_targets, torch.tensor(train_transfer_targets)])
eval_targets = torch.cat([eval_targets, torch.tensor(eval_transfer_targets)])

In [None]:
train_inputs = torch.tensor(train_transfer_inputs)
eval_inputs = torch.tensor(eval_transfer_inputs)
train_targets = torch.tensor(train_transfer_targets)
eval_targets = torch.tensor(eval_transfer_targets)

In [None]:
min, max, mu, sigma = get_stats(train_inputs, r=True)
train_inputs = zscore(train_inputs)
eval_inputs = zscore(eval_inputs)
get_stats(train_inputs), get_stats(eval_inputs)

In [None]:
from torch.utils.data import TensorDataset

train_ds = TensorDataset(train_inputs.float(), train_targets.float())
eval_ds = TensorDataset(eval_inputs.float(), eval_targets.float())
len(train_ds), len(eval_ds)

In [None]:
from torch.utils.data import DataLoader


def build_loader(
    SEED,
    ds,
    train=True,
    batch_size=1,
    shuffle=False,
    num_workers=4,
    drop_last=True,
    pin_memory=True,
    persistent_workers=False,
):
    def seed_worker(worker_id):
        worker_seed = torch.initial_seed() % 2**32
        np.random.seed(worker_seed)
        random.seed(worker_seed)

    generator = torch.Generator()
    generator.manual_seed(SEED if train else SEED+5232)

    return DataLoader(
        ds,
        batch_size=batch_size,
        shuffle=shuffle,
        num_workers=num_workers,
        pin_memory=pin_memory,
        drop_last=drop_last,
        persistent_workers=persistent_workers,
        worker_init_fn=seed_worker,
        generator=generator,
        #sampler=DistributedSampler(
        #    train_ds,
        #    shuffle=True,
        #    drop_last=True,
        #    seed=config.seed
        #)
    )
    
    
def return_dls(train_ds, eval_ds, train_batch_size, eval_batch_size):
    train_dl = build_loader(
        SEED,
        train_ds,
        train=True,
        batch_size=train_batch_size,
        shuffle=True,
        num_workers=0,
        drop_last=False,
        pin_memory=True,
        persistent_workers=False,
    )

    eval_dl = build_loader(
        SEED,
        eval_ds,
        train=False,
        batch_size=eval_batch_size,
        shuffle=False,
        num_workers=0,
        drop_last=False,
        pin_memory=True,
        persistent_workers=False,
    )
    
    return train_dl, eval_dl

In [None]:
import neptune


def setup_neptune():
    if not RESUME:
        neptune_run = neptune.init_run(
            project="arbaaz/kaggle-spect",
            name=MODEL_NAME,
            api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJlOGE2YjNiZS1mZGUyLTRjYjItYTg5Yy1mZWJkZTIzNzE1NmIifQ=="
        )

        neptune_run["h_parameters"] = {
            "seed": SEED,
            "model_name": MODEL_NAME,
            "optimizer_name": "nadam",
            "learning_rate": LR,
            "scheduler_name": "default",
            "weight_decay": WD,
            "num_epochs": EPOCHS,
            "batch_size": BATCH_SIZE,
        }
        if DROPOUT: neptune_run["h_parameters"] = {"dropout": DROPOUT}
        if DROP_PATH_RATE: neptune_run["h_parameters"] = {"drop_path_rate": DROP_PATH_RATE}
    else:
        neptune_run = neptune.init_run(
            project="arbaaz/crunchdao-structural-break",
            with_id=config.with_id,
            api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJlOGE2YjNiZS1mZGUyLTRjYjItYTg5Yy1mZWJkZTIzNzE1NmIifQ=="
        )

    return neptune_run

In [None]:
import torch.nn.functional as F
from sklearn.metrics import r2_score


def loss_fn(logits, targets):
    logits = logits.view(-1)
    targets = targets.view(-1)
    return F.mse_loss(logits, targets)


def metric_fn(logits, targets):
    preds = logits.cpu().detach().numpy()
    targets = targets.cpu().detach().numpy()
    
    dim1 = r2_score(targets[:, 0], preds[:, 0])
    dim2 = r2_score(targets[:, 1], preds[:, 1])
    dim3 = r2_score(targets[:, 2], preds[:, 2])
    
    mean_r2 = (dim1 + dim2 + dim3) / 3
    
    return dim1, dim2, dim3, mean_r2

In [None]:
import torch.nn as nn


class ResidualBlock(nn.Module):
    """A residual block with two 1D convolutional layers."""
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, stride, padding=kernel_size//2)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.elu = nn.ELU()
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, padding=kernel_size//2)
        self.bn2 = nn.BatchNorm1d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm1d(out_channels)
            )

    def forward(self, x):
        out = self.elu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.elu(out)
        return out

class ResNet(nn.Module):
    """A deeper ResNet-style 1D CNN for Raman spectra."""
    def __init__(self, input_channels=3, num_classes=3):
        super().__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv1d(input_channels, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm1d(64)
        self.elu = nn.GELU()
        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(64, 2, stride=1)
        self.layer2 = self._make_layer(128, 2, stride=2)
        self.layer3 = self._make_layer(256, 2, stride=2)
        self.layer4 = self._make_layer(512, 2, stride=2)

        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.classifier = nn.Sequential(
            nn.Linear(512, 256),
            nn.ELU(),
            nn.Dropout(0.5), # Increased dropout for better regularization
            nn.Linear(256, num_classes)
        )

    def _make_layer(self, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for s in strides:
            layers.append(ResidualBlock(self.in_channels, out_channels, stride=s))
            self.in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.elu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [None]:
from tqdm.auto import tqdm


def train(
    model, 
    optimizer,
    device,
    scaler, 
    scheduler,
    train_dl,
    eval_dl,
    epochs,
    checkpoint_name,
    score=-float("inf"),
    neptune_run=None,
    p=True,
):  
    for epoch in tqdm(range(epochs)):
        model.train()
        total_loss = 0.0
        all_logits = []
        all_targets = []
        
        for inputs, targets in train_dl:
            inputs = inputs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)
            
            with torch.amp.autocast(device_type=device, dtype=torch.float16, cache_enabled=True):
                logits = model(inputs)
                loss = loss_fn(logits, targets)
            
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            scheduler.step()
            if neptune_run is not None:  neptune_run["lr_step"].append(scheduler.get_last_lr()[0])
            
            total_loss += loss.detach().cpu()
            all_logits.append(logits.detach().cpu())
            all_targets.append(targets.detach().cpu())
        
        all_logits = torch.cat(all_logits)
        all_targets = torch.cat(all_targets)

        one, two, three, r2 = metric_fn(all_logits, all_targets)
        total_loss = total_loss / len(train_dl)
        
        model.eval()
        eval_total_loss = 0.0
        eval_all_logits = []
        eval_all_targets = []

        for inputs, targets in eval_dl:
            inputs = inputs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)

            with torch.inference_mode():
                #with torch.amp.autocast(device_type=device, dtype=torch.float16, cache_enabled=True):
                logits = model(inputs)
                loss = loss_fn(logits, targets)

            eval_total_loss += loss.detach().cpu()
            eval_all_logits.append(logits.detach().cpu())
            eval_all_targets.append(targets.detach().cpu())
        
        eval_all_logits = torch.cat(eval_all_logits)
        eval_all_targets = torch.cat(eval_all_targets)

        eval_one, eval_two, eval_three, eval_r2 = metric_fn(eval_all_logits, eval_all_targets)
        eval_total_loss = eval_total_loss / len(eval_dl)
        
        if eval_r2 > score:
            score = eval_r2
            data = {"state_dict": model.state_dict()}
            data["epoch"] = epoch 
            data["score"] = score
            torch.save(data, f"/kaggle/working/{checkpoint_name}")
        
        if neptune_run is not None:
            neptune_run["train/loss"].append(total_loss)
            neptune_run["eval/loss"].append(eval_total_loss)
            neptune_run["train/r2"].append(r2)
            neptune_run["eval/r2"].append(eval_r2)
            neptune_run["train/one"].append(one)
            neptune_run["train/two"].append(two)
            neptune_run["train/three"].append(three)
            neptune_run["eval/one"].append(eval_one)
            neptune_run["eval/two"].append(eval_two)
            neptune_run["eval/three"].append(eval_three)
            
        if p:
            print(
                f"Epoch: {epoch}, "
                f"train/loss: {total_loss:.4f}, "
                f"eval/loss: {eval_total_loss:.4f}, "
                f"train/r2: {r2:.4f}, "
                f"eval/r2: {eval_r2:.4f}, "
                f"train/one: {one:.4f}, "
                f"train/two: {two:.4f}, "
                f"train/three: {three:.4f}, "
                f"eval/one: {eval_one:.4f}, "
                f"eval/two: {eval_two:.4f}, "
                f"eval/three: {eval_three:.4f} "
            )
            
    if neptune_run is not None: neptune_run.stop()

In [None]:
import random
import warnings
from transformers import get_cosine_schedule_with_warmup
from tqdm.auto import tqdm

#warnings.filterwarnings("ignore")

MODEL_NAME = "ResNet.Finetune"
EPOCHS = 500
BATCH_SIZE = len(train_ds)
WD = 1e-3
LR = 1e-4
DROPOUT = 0.5
#DROP_PATH_RATE = 0.0
device = "cuda" if torch.cuda.is_available() else "cpu"
checkpoint_name = "pretrain.pt"
RESUME = False

model = ResNet(input_channels=2).to(device)
get_model_size(model)

#ckpt_path = "/kaggle/working/pretrain.pt"
#ckpt = torch.load(ckpt_path, weights_only=False)
#model.load_state_dict(ckpt["state_dict"])

optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WD, foreach=True)
scaler = torch.amp.GradScaler(device)
train_dl, eval_dl = return_dls(train_ds, eval_ds, BATCH_SIZE, len(eval_ds))

total_training_steps = len(train_dl) * EPOCHS
warmup_steps = int(total_training_steps * 0.05)  # e.g. 5% warmup
scheduler = get_cosine_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=total_training_steps
)

In [None]:
train(
    model, 
    optimizer, 
    device,
    scaler,
    scheduler,
    train_dl, 
    eval_dl,
    EPOCHS,
    checkpoint_name,
    neptune_run=setup_neptune(),
)

In [None]:
from sklearn.model_selection import KFold


kfold = KFold(n_splits=5, shuffle=True, random_state=SEED)
splits = kfold.split(dataset)

In [None]:
ckpt_path = f"/kaggle/working/{checkpoint_name}"
ckpt = torch.load(ckpt_path, weights_only=False)
ckpt["epoch"], ckpt["score"]

In [None]:
(443, 0.4866819899481086)

In [None]:
test = load_test_data()
get_stats(test)
test = get_spectra_features(test)
test = torch.tensor(test)
test = zscore(test, mu, sigma).float()
test.shape, test.dtype, get_stats(test)

In [None]:
model = ResNet(input_channels=2).to(device)
model.load_state_dict(ckpt["state_dict"])
model.eval()

with torch.inference_mode():
    preds = model(test.cuda())

preds = preds.cpu().detach().double().numpy()
preds.shape

In [None]:
get_stats(preds)

In [None]:
column_names = ['Glucose', 'Sodium Acetate', 'Magnesium Sulfate']
preds_df = pd.DataFrame(preds, columns=column_names)
preds_df.insert(0, 'ID', [i+1 for i in range(len(preds_df))])
preds_df

In [None]:
name = MODEL_NAME+".finetune.transfer.in.pretrain.csv"
preds_df.to_csv(name, index=False)
f = pd.read_csv(f"/kaggle/working/{name}")
f