<a href="https://colab.research.google.com/github/Krankile/ensemble_forecasting/blob/main/notebooks/weight_net/2_fit_weight_net.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Setup

**Note:** Data set classes expect data to be normalized

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%%capture
!pip install wandb

Go here to find wandb API key:

[https://wandb.ai/settings](https://wandb.ai/settings)

In [3]:
import wandb as wb
wb.login()


[34m[1mwandb[0m: Currently logged in as: [33mkrankile[0m (use `wandb login --relogin` to force relogin)


True

In [4]:
%%capture
!git clone https://github.com/Krankile/ensemble_forecasting.git
!mv ensemble_forecasting ef

In [5]:
%%capture
!cd ef && git pull

In [6]:
import os
import copy
import math
import random
from multiprocessing import cpu_count
from pathlib import Path
from collections import namedtuple
from functools import partial

import numpy as np
import pandas as pd

from tqdm import tqdm
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
import torch.nn.functional as F

from ef.models import weightnets
from ef.utils import loss_functions, activations, optimizers, schedulers, scalers

from ef.data import ensemble_loaders, ensemble_loaders_kfold

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [8]:
def artifact_to_path(run, art_name, *, root="krankile/data-processing/"):
    art = run.use_artifact(root + art_name); art.download()
    return art.file()

# Debug area

# Training loop

## Normal train-val split

In [9]:
def train_model(model, train_loader, val_loader, num_examples, conf):    
    batch_size = conf.batch_size

    optimizer = optimizers[conf.optimizer](model.parameters(), lr=conf.learning_rate, weight_decay=conf.weight_decay)
    scheduler = schedulers[conf.schedule](
        optimizer, conf.learning_rate,
        epochs=conf.epochs,
        steps_per_epoch=math.ceil(num_examples / batch_size),
    )

    loss_func = loss_functions[conf.loss_func]
    it = tqdm(range(1, conf.epochs+1))
    
    best_loss = float("inf")
    step = 0

    for epoch in it:

        #Each epoch has a training and validation phase
        train_losses = []
        val_losses = []
        for phase in ['train','val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                batches = train_loader
            else:
                model.eval()  # Set model to evaluate mode
                batches = val_loader
            for i, tensors in enumerate(batches):
                cats, inputs, forecasts, actuals, *loss_args = map(lambda x: x.to(device), tensors)
                optimizer.zero_grad()

                y_pred = model(cats, inputs.float()).unsqueeze(2)

                prediction = torch.matmul(forecasts, y_pred).squeeze(2)
                loss = loss_func(prediction, actuals, *loss_args)
                if phase == 'train':
                    train_losses.append(loss.item())
                    loss.backward()

                    optimizer.step()
                    scheduler.step()
                    
                    step += 1
                else:
                    val_losses.append(loss.item())

        train_loss = np.mean(train_losses)
        val_loss = np.mean(val_losses)

        if val_loss < best_loss: 
            best_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())

            filepath = "model.pth"
            torch.save(best_model_wts, filepath)
            wb.save(filepath)

        wb.log({"train_loss": train_loss, "val_loss": val_loss, "epoch": epoch, "best_loss": best_loss, "n_examples":batch_size*step, "lr": optimizer.param_groups[0]["lr"]}, step=step)
        it.set_postfix({"train_loss": train_loss, "val_loss": val_loss, "best_loss": best_loss, "lr": f'{optimizer.param_groups[0]["lr"]:.2e}'})
        
    model.load_state_dict(best_model_wts)
    return model.eval()

In [10]:
def train(config=None, project=None, entity=None, enablewb=True):
    mode = "online" if enablewb else "online"
    with wb.init(config=config, project=project, entity=entity, job_type="training", mode=mode) as run:
        conf = run.config
        print(conf)

        datapath = artifact_to_path(run, conf.data)
        splitpath = artifact_to_path(run, conf.data_split)

        (
            train_loader,
            val_loader,
            emb_dims,
            num_cont,
            num_examples,
        ) = ensemble_loaders(
                    datapath=datapath, splitpath=splitpath,
                    batch_size=conf.batch_size,
                    feature_set=conf.feature_set,
                    n_models=conf.num_models,)
        
        model = weightnets[conf.architecture](
            num_cont=num_cont,
            out_size=conf.num_models,
            n_hidden=conf.n_hidden,
            hidden_dim=conf.hidden_dim,
            dropout=conf.dropout,
            bn=conf.bn,
            activation=conf.act,
            emb_dims=emb_dims,
        )

        print(f"Moving model to device: {device}")
        model = model.float().to(device)

        model = train_model(
            model,
            train_loader,
            val_loader,
            num_examples,
            conf=conf,
        )
    return model
    

## Training over all train data with final hyperparameters

In [None]:
def train_model_final(model, train_loader, num_examples, conf):    
    batch_size = conf.batch_size

    optimizer = optimizers[conf.optimizer](model.parameters(), lr=conf.learning_rate, weight_decay=conf.weight_decay)
    scheduler = schedulers[conf.schedule](
        optimizer, conf.learning_rate,
        epochs=conf.epochs,
        steps_per_epoch=math.ceil(num_examples / batch_size),
    )

    loss_func = loss_functions[conf.loss_func]
    it = tqdm(range(1, conf.epochs+1))
    steps = 0

    for epoch in it:

        train_losses = []
        model.train()

        for i, tensors in enumerate(train_loader):
            cats, inputs, forecasts, actuals, *loss_args = map(lambda x: x.to(device), tensors)
            optimizer.zero_grad()

            y_pred = model(cats, inputs.float()).unsqueeze(2)

            prediction = torch.matmul(forecasts, y_pred).squeeze(2)
            loss = loss_func(prediction, actuals, *loss_args)
            train_losses.append(loss.item())
            loss.backward()

            optimizer.step()
            scheduler.step()
            step += 1

        train_loss = np.mean(train_losses)

        wb.log({"train_loss": train_loss, "epoch": epoch, "n_examples":batch_size*step, "lr": optimizer.param_groups[0]["lr"]}, step=step)
        it.set_postfix({"train_loss": train_loss, "lr": f'{optimizer.param_groups[0]["lr"]:.2e}'})
        
    model.load_state_dict(best_model_wts)
    return model.eval()

In [None]:
def train_final(config=None, project=None, entity=None, enablewb=True):
    mode = "online" if enablewb else "online"
    with wb.init(config=config, project=project, entity=entity, job_type="training", mode=mode) as run:
        conf = run.config
        print(conf)

        datapath = artifact_to_path(run, conf.data)

        (
            train_loader,
            emb_dims,
            num_cont,
            num_examples,
        ) = ensemble_loaders(
                    datapath=datapath,
                    batch_size=conf.batch_size,
                    feature_set=conf.feature_set,
                    n_models=conf.num_models,)
        
        model = weightnets[conf.architecture](
            num_cont=num_cont,
            out_size=conf.num_models,
            n_hidden=conf.n_hidden,
            hidden_dim=conf.hidden_dim,
            dropout=conf.dropout,
            bn=conf.bn,
            activation=conf.act,
            emb_dims=emb_dims,
        )

        print(f"Moving model to device: {device}")
        model = model.float().to(device)

        model = train_model(
            model,
            train_loader,
            num_examples,
            conf=conf,
        )

        filepath = "model.pth"
        torch.save(model.state_dict(), filepath)
        wb.save(filepath)

        return model
    

## Train with k-fold cross-validation

In [32]:
def train_model_kfold(model, train_loader, val_loader, num_examples, conf, fold_num):    
    batch_size = conf.batch_size

    optimizer = optimizers[conf.optimizer](model.parameters(), lr=conf.learning_rate, weight_decay=conf.weight_decay)
    scheduler = schedulers[conf.schedule](
        optimizer, conf.learning_rate,
        epochs=conf.epochs,
        steps_per_epoch=math.ceil(num_examples / batch_size),
    )

    loss_func = loss_functions[conf.loss_func]
    it = tqdm(range(1, conf.epochs+1), desc=f"Fold {fold_num+1} of {conf.k*len(conf.shuffle_seeds)}")
    
    for epoch in it:

        #Each epoch has a training and validation phase
        train_losses = []
        val_losses = []
        for phase in ['train','val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                batches = train_loader
            else:
                model.eval()  # Set model to evaluate mode
                batches = val_loader
            for i, tensors in enumerate(batches):
                cats, inputs, forecasts, actuals, *loss_args = map(lambda x: x.to(device), tensors)
                optimizer.zero_grad()

                y_pred = model(cats, inputs.float()).unsqueeze(2)

                prediction = torch.matmul(forecasts, y_pred).squeeze(2)
                loss = loss_func(prediction, actuals, *loss_args)
                if phase == 'train':
                    train_losses.append(loss.item())
                    loss.backward()

                    optimizer.step()
                    scheduler.step()
                else:
                    val_losses.append(loss.item())

        train_loss = np.mean(train_losses)
        val_loss = np.mean(val_losses)

        wb.log({f"train_loss/{fold_num}": train_loss, f"val_loss/{fold_num}": val_loss, "epoch": epoch, "lr": optimizer.param_groups[0]['lr']})
        it.set_postfix({"train_loss": train_loss, "val_loss": val_loss, "lr": f"{optimizer.param_groups[0]['lr']:.2e}"})

    return val_loss

In [33]:
def standardize(df, scaler=None):
    feats = df.loc[:, "x_acf1":"lstm_31"]
    if scaler is None:
        scaler = StandardScaler().fit(feats)

    index, columns = feats.index, feats.columns
    df.loc[:, "x_acf1":"lstm_31"] = pd.DataFrame(scaler.transform(feats), index=index, columns=columns)

    return df, scaler

In [40]:
def train_kfold(config=None, project=None, entity=None, enablewb=True):
    mode = "online" if enablewb else "online"
    with wb.init(config=config, project=project, entity=entity, job_type="training", mode=mode) as run:
        conf = run.config
        print(conf)

        rnd_seed = np.random.randint(1e9)
        run.log({"random_seed": rnd_seed})
        datapath = artifact_to_path(run, conf.data)

        df = pd.read_feather(datapath).set_index("m4id")
        
        outer_losses = []
        for s, seed in enumerate(conf.shuffle_seeds):
            df = shuffle(df, random_state=seed)
            folds = np.array_split(df, conf.k)
            losses = []
            for i, val in enumerate(folds, start=(s*conf.k)):
                data = pd.concat(folds[:i] + folds[(i+1):], axis=0)
                data, scaler = standardize(data, scaler=None)
                val, _ = standardize(val, scaler=scaler)

                (
                    train_loader,
                    val_loader,
                    emb_dims,
                    num_cont,
                    num_examples,
                ) = ensemble_loaders_kfold(
                            data=data, val=val,
                            batch_size=conf.batch_size,
                            feature_set=conf.feature_set,
                            n_models=conf.num_models,
                            cpus=None,)

                torch.manual_seed(rnd_seed)
                model = weightnets[conf.architecture](
                    num_cont=num_cont,
                    out_size=conf.num_models,
                    n_hidden=conf.n_hidden,
                    hidden_dim=conf.hidden_dim,
                    dropout=conf.dropout,
                    bn=conf.bn,
                    activation=conf.act,
                    emb_dims=emb_dims,
                )

                print(f"Moving model to device: {device}")
                model = model.float().to(device)

                loss = train_model_kfold(
                    model,
                    train_loader,
                    val_loader,
                    num_examples,
                    conf=conf,
                    fold_num=i,
                )

                losses.append(loss)

        overall_loss = np.mean(losses)
        run.log({"overall_loss": overall_loss})

    return overall_loss

## Run config

### Normal config

In [36]:
norm_config = dict(
    epochs=20,
    hidden_dim=256,
    n_hidden=2,
    learning_rate=2e-3,
    optimizer="adamw",
    architecture="WeightNetV4",
    data="ensemble_traval:standard",
    data_split="traval_split_80_20:v0",
    batch_size=1024,
    loss_func="owa",
    dropout=0.5,
    weight_decay=0.05,
    bn=False,
    feature_set="ma",
    act="leaky",
    num_models=14,
    schedule=None,
)

### K-fold config

In [37]:
kfold_config = dict(
    k=5,
    shuffle_seeds=[69, 420, 666]
    epochs=5,
    hidden_dim=256,
    n_hidden=2,
    learning_rate=2e-3,
    optimizer="adamw",
    architecture="WeightNetV4",
    data="ensemble_traval:non-standard",
    batch_size=1024,
    loss_func="owa",
    dropout=0.5,
    weight_decay=0.05,
    bn=False,
    feature_set="ma",
    act="leaky",
    num_models=14,
    schedule=None,
)

## Start run

In [None]:
# For final training
enablewb = True
project = "weight-net-tmp"

train_func(config=norm_config, project=project, entity="krankile", enablewb=enablewb)

In [None]:
sweepid = "krankile/weight-net/4r44tbf3"
enablewb = True
project = "weight-net"
usecv = True

train_func, config = (train_kfold, kfold_config) if usecv else (train, norm_config)

if sweepid:
    count = 500 # number of runs to execute
    wb.agent(sweepid, function=partial(train_func, config=config), count=count)
else:
    res = train_func(config=config, project=project, entity="krankile", enablewb=enablewb)

[34m[1mwandb[0m: Agent Starting Run: 2h2oz37b with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout: 0.4495462534810624
[34m[1mwandb[0m: 	epochs: 11
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0022238325304523507
[34m[1mwandb[0m: 	n_hidden: 2
[34m[1mwandb[0m: 	weight_decay: 0.032496945846470844


{'batch_size': 128, 'dropout': 0.4495462534810624, 'epochs': 11, 'hidden_dim': 64, 'learning_rate': 0.0022238325304523507, 'n_hidden': 2, 'weight_decay': 0.032496945846470844, 'k': 5, 'optimizer': 'adamw', 'architecture': 'WeightNetV4', 'data': 'ensemble_traval:non-standard', 'data_split': 'traval_split_80_20:v0', 'loss_func': 'owa', 'bn': False, 'feature_set': 'ma', 'act': 'leaky', 'num_models': 14, 'schedule': None}


[34m[1mwandb[0m: Downloading large artifact ensemble_traval:non-standard, 119.61MB. 1 files... Done. 0:0:0


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 1 of 5: 100%|██████████| 11/11 [01:30<00:00,  8.25s/it, train_loss=0.767, val_loss=0.775, lr=2.22e-03]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 2 of 5: 100%|██████████| 11/11 [01:31<00:00,  8.36s/it, train_loss=0.771, val_loss=0.777, lr=2.22e-03]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 3 of 5: 100%|██████████| 11/11 [01:29<00:00,  8.15s/it, train_loss=0.773, val_loss=0.776, lr=2.22e-03]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 4 of 5: 100%|██████████| 11/11 [01:32<00:00,  8.44s/it, train_loss=0.773, val_loss=0.778, lr=2.22e-03]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 5 of 5: 100%|██████████| 11/11 [01:32<00:00,  8.39s/it, train_loss=0.77, val_loss=0.824, lr=2.22e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 6 of 5: 100%|██████████| 11/11 [01:51<00:00, 10.14s/it, train_loss=0.767, val_loss=0.759, lr=2.22e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 7 of 5: 100%|██████████| 11/11 [01:51<00:00, 10.17s/it, train_loss=0.77, val_loss=0.768, lr=2.22e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 8 of 5: 100%|██████████| 11/11 [01:53<00:00, 10.27s/it, train_loss=0.772, val_loss=0.772, lr=2.22e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 9 of 5: 100%|██████████| 11/11 [01:51<00:00, 10.13s/it, train_loss=0.773, val_loss=0.773, lr=2.22e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 10 of 5: 100%|██████████| 11/11 [01:51<00:00, 10.16s/it, train_loss=0.773, val_loss=0.777, lr=2.22e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 11 of 5: 100%|██████████| 11/11 [01:48<00:00,  9.87s/it, train_loss=0.768, val_loss=0.763, lr=2.22e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 12 of 5: 100%|██████████| 11/11 [01:53<00:00, 10.34s/it, train_loss=0.768, val_loss=0.763, lr=2.22e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 13 of 5: 100%|██████████| 11/11 [01:51<00:00, 10.12s/it, train_loss=0.768, val_loss=0.767, lr=2.22e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 14 of 5: 100%|██████████| 11/11 [01:51<00:00, 10.17s/it, train_loss=0.77, val_loss=0.769, lr=2.22e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 15 of 5: 100%|██████████| 11/11 [01:53<00:00, 10.28s/it, train_loss=0.77, val_loss=0.772, lr=2.22e-03]


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▂▅▇▂▅█▃▆▁▄▇▂▅▁▄▇▂▅▇▂▅█▃▆▁▄█▃▆▁▄▇▂▅▇▂▅█▃▇
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
overall_loss,▁
random_seed,▁
train_loss/0,█▂▂▂▂▁▁▁▁▁▁
train_loss/1,█▃▂▂▂▂▁▁▁▁▁
train_loss/10,█▄▃▃▂▂▂▂▁▁▁
train_loss/11,█▄▄▃▃▂▂▂▂▁▁
train_loss/12,█▄▃▃▂▂▂▂▁▁▁
train_loss/13,█▃▃▂▂▂▂▁▁▁▁

0,1
epoch,11.0
lr,0.00222
overall_loss,0.76699
random_seed,513762933.0
train_loss/0,0.76664
train_loss/1,0.77106
train_loss/10,0.76808
train_loss/11,0.76826
train_loss/12,0.76842
train_loss/13,0.77002


[34m[1mwandb[0m: Agent Starting Run: 11ipmfka with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout: 0.5425306950557668
[34m[1mwandb[0m: 	epochs: 9
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0008020849127072686
[34m[1mwandb[0m: 	n_hidden: 3
[34m[1mwandb[0m: 	weight_decay: 0.01927442922471968


{'batch_size': 128, 'dropout': 0.5425306950557668, 'epochs': 9, 'hidden_dim': 64, 'learning_rate': 0.0008020849127072686, 'n_hidden': 3, 'weight_decay': 0.01927442922471968, 'k': 5, 'optimizer': 'adamw', 'architecture': 'WeightNetV4', 'data': 'ensemble_traval:non-standard', 'data_split': 'traval_split_80_20:v0', 'loss_func': 'owa', 'bn': False, 'feature_set': 'ma', 'act': 'leaky', 'num_models': 14, 'schedule': None}


[34m[1mwandb[0m: Downloading large artifact ensemble_traval:non-standard, 119.61MB. 1 files... Done. 0:0:0


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 1 of 5: 100%|██████████| 9/9 [01:17<00:00,  8.63s/it, train_loss=0.774, val_loss=0.78, lr=8.02e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 2 of 5: 100%|██████████| 9/9 [01:16<00:00,  8.50s/it, train_loss=0.779, val_loss=0.782, lr=8.02e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 3 of 5: 100%|██████████| 9/9 [01:16<00:00,  8.46s/it, train_loss=0.78, val_loss=0.783, lr=8.02e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 4 of 5: 100%|██████████| 9/9 [01:18<00:00,  8.72s/it, train_loss=0.779, val_loss=0.782, lr=8.02e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 5 of 5: 100%|██████████| 9/9 [01:19<00:00,  8.83s/it, train_loss=0.778, val_loss=0.834, lr=8.02e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 6 of 5: 100%|██████████| 9/9 [01:32<00:00, 10.23s/it, train_loss=0.776, val_loss=0.769, lr=8.02e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 7 of 5: 100%|██████████| 9/9 [01:35<00:00, 10.60s/it, train_loss=0.781, val_loss=0.78, lr=8.02e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 8 of 5: 100%|██████████| 9/9 [01:34<00:00, 10.47s/it, train_loss=0.781, val_loss=0.781, lr=8.02e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 9 of 5: 100%|██████████| 9/9 [01:34<00:00, 10.50s/it, train_loss=0.78, val_loss=0.783, lr=8.02e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 10 of 5: 100%|██████████| 9/9 [01:31<00:00, 10.21s/it, train_loss=0.779, val_loss=0.782, lr=8.02e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 11 of 5: 100%|██████████| 9/9 [01:33<00:00, 10.33s/it, train_loss=0.771, val_loss=0.767, lr=8.02e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 12 of 5: 100%|██████████| 9/9 [01:35<00:00, 10.58s/it, train_loss=0.775, val_loss=0.77, lr=8.02e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 13 of 5: 100%|██████████| 9/9 [01:36<00:00, 10.73s/it, train_loss=0.777, val_loss=0.777, lr=8.02e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 14 of 5: 100%|██████████| 9/9 [01:33<00:00, 10.37s/it, train_loss=0.776, val_loss=0.775, lr=8.02e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 15 of 5: 100%|██████████| 9/9 [01:34<00:00, 10.53s/it, train_loss=0.777, val_loss=0.778, lr=8.02e-04]


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▂▄▇▃▅█▄▅▁▅▆▂▅▁▃▆▂▄▇▃▅█▄▅▁▅█▂▅▁▃▆▂▄▇▃▅█▄▇
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
overall_loss,▁
random_seed,▁
train_loss/0,█▂▂▂▁▁▁▁▁
train_loss/1,█▂▂▂▂▁▁▁▁
train_loss/10,█▃▃▂▂▂▁▁▁
train_loss/11,█▃▃▂▂▂▂▁▁
train_loss/12,█▃▃▂▂▂▁▁▁
train_loss/13,█▃▃▂▂▂▁▁▁

0,1
epoch,9.0
lr,0.0008
overall_loss,0.77333
random_seed,541052347.0
train_loss/0,0.7736
train_loss/1,0.77936
train_loss/10,0.77116
train_loss/11,0.7752
train_loss/12,0.77705
train_loss/13,0.77594


[34m[1mwandb[0m: Agent Starting Run: i51e1yj3 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout: 0.7005490669203649
[34m[1mwandb[0m: 	epochs: 6
[34m[1mwandb[0m: 	hidden_dim: 192
[34m[1mwandb[0m: 	learning_rate: 0.00011567503363432715
[34m[1mwandb[0m: 	n_hidden: 1
[34m[1mwandb[0m: 	weight_decay: 0.0035087095615373605


{'batch_size': 128, 'dropout': 0.7005490669203649, 'epochs': 6, 'hidden_dim': 192, 'learning_rate': 0.00011567503363432715, 'n_hidden': 1, 'weight_decay': 0.0035087095615373605, 'k': 5, 'optimizer': 'adamw', 'architecture': 'WeightNetV4', 'data': 'ensemble_traval:non-standard', 'data_split': 'traval_split_80_20:v0', 'loss_func': 'owa', 'bn': False, 'feature_set': 'ma', 'act': 'leaky', 'num_models': 14, 'schedule': None}


[34m[1mwandb[0m: Downloading large artifact ensemble_traval:non-standard, 119.61MB. 1 files... Done. 0:0:0


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 1 of 5: 100%|██████████| 6/6 [00:50<00:00,  8.40s/it, train_loss=0.798, val_loss=0.838, lr=1.16e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 2 of 5: 100%|██████████| 6/6 [00:50<00:00,  8.49s/it, train_loss=0.822, val_loss=0.803, lr=1.16e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 3 of 5: 100%|██████████| 6/6 [00:47<00:00,  7.98s/it, train_loss=0.824, val_loss=0.801, lr=1.16e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 4 of 5: 100%|██████████| 6/6 [00:50<00:00,  8.40s/it, train_loss=0.82, val_loss=0.803, lr=1.16e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 5 of 5: 100%|██████████| 6/6 [00:49<00:00,  8.25s/it, train_loss=0.804, val_loss=0.829, lr=1.16e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 6 of 5: 100%|██████████| 6/6 [01:00<00:00, 10.15s/it, train_loss=0.791, val_loss=0.785, lr=1.16e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 7 of 5: 100%|██████████| 6/6 [01:00<00:00, 10.10s/it, train_loss=0.803, val_loss=0.812, lr=1.16e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 8 of 5: 100%|██████████| 6/6 [01:00<00:00, 10.11s/it, train_loss=0.805, val_loss=0.795, lr=1.16e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 9 of 5: 100%|██████████| 6/6 [00:58<00:00,  9.73s/it, train_loss=0.805, val_loss=0.798, lr=1.16e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 10 of 5: 100%|██████████| 6/6 [01:00<00:00, 10.14s/it, train_loss=0.804, val_loss=0.802, lr=1.16e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 11 of 5: 100%|██████████| 6/6 [01:00<00:00, 10.07s/it, train_loss=0.793, val_loss=0.788, lr=1.16e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 12 of 5: 100%|██████████| 6/6 [00:59<00:00,  9.93s/it, train_loss=0.797, val_loss=0.791, lr=1.16e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 13 of 5: 100%|██████████| 6/6 [01:00<00:00, 10.17s/it, train_loss=0.796, val_loss=0.794, lr=1.16e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 14 of 5: 100%|██████████| 6/6 [01:01<00:00, 10.24s/it, train_loss=0.794, val_loss=0.801, lr=1.16e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 15 of 5: 100%|██████████| 6/6 [00:59<00:00,  9.86s/it, train_loss=0.803, val_loss=0.795, lr=1.16e-04]


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▄▇▁▅█▂▅▁▄▇▂▅█▂▇▁▄█▂▅█▄▇▁▅█▂▅▁▄▇▂▅█▂▇▁▄█
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
overall_loss,▁
random_seed,▁
train_loss/0,█▄▃▂▁▁
train_loss/1,█▃▂▁▁▁
train_loss/10,█▃▂▁▁▁
train_loss/11,█▃▂▁▁▁
train_loss/12,█▃▂▁▁▁
train_loss/13,█▃▂▁▁▁

0,1
epoch,6.0
lr,0.00012
overall_loss,0.79371
random_seed,366594996.0
train_loss/0,0.79815
train_loss/1,0.82184
train_loss/10,0.79258
train_loss/11,0.7969
train_loss/12,0.79558
train_loss/13,0.79374


[34m[1mwandb[0m: Agent Starting Run: di41y936 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout: 0.5401790374545598
[34m[1mwandb[0m: 	epochs: 17
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.010849026320638867
[34m[1mwandb[0m: 	n_hidden: 4
[34m[1mwandb[0m: 	weight_decay: 0.032349410427108434


{'batch_size': 128, 'dropout': 0.5401790374545598, 'epochs': 17, 'hidden_dim': 128, 'learning_rate': 0.010849026320638867, 'n_hidden': 4, 'weight_decay': 0.032349410427108434, 'k': 5, 'optimizer': 'adamw', 'architecture': 'WeightNetV4', 'data': 'ensemble_traval:non-standard', 'data_split': 'traval_split_80_20:v0', 'loss_func': 'owa', 'bn': False, 'feature_set': 'ma', 'act': 'leaky', 'num_models': 14, 'schedule': None}


[34m[1mwandb[0m: Downloading large artifact ensemble_traval:non-standard, 119.61MB. 1 files... Done. 0:0:0


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 1 of 5: 100%|██████████| 17/17 [02:30<00:00,  8.83s/it, train_loss=0.874, val_loss=0.877, lr=1.08e-02]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 2 of 5: 100%|██████████| 17/17 [02:27<00:00,  8.68s/it, train_loss=0.901, val_loss=0.876, lr=1.08e-02]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 3 of 5: 100%|██████████| 17/17 [02:25<00:00,  8.57s/it, train_loss=0.856, val_loss=0.856, lr=1.08e-02]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 4 of 5: 100%|██████████| 17/17 [02:28<00:00,  8.71s/it, train_loss=1.06, val_loss=1.06, lr=1.08e-02]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 5 of 5: 100%|██████████| 17/17 [02:26<00:00,  8.64s/it, train_loss=0.874, val_loss=0.876, lr=1.08e-02]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 6 of 5: 100%|██████████| 17/17 [03:01<00:00, 10.65s/it, train_loss=1.07, val_loss=1.07, lr=1.08e-02]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 7 of 5: 100%|██████████| 17/17 [02:58<00:00, 10.47s/it, train_loss=3.43, val_loss=22.9, lr=1.08e-02]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 8 of 5: 100%|██████████| 17/17 [02:57<00:00, 10.44s/it, train_loss=1.09, val_loss=1.09, lr=1.08e-02]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 9 of 5: 100%|██████████| 17/17 [02:57<00:00, 10.47s/it, train_loss=0.994, val_loss=0.998, lr=1.08e-02]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 10 of 5: 100%|██████████| 17/17 [02:57<00:00, 10.44s/it, train_loss=0.925, val_loss=0.928, lr=1.08e-02]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 11 of 5: 100%|██████████| 17/17 [02:59<00:00, 10.57s/it, train_loss=1.07, val_loss=1.07, lr=1.08e-02]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 12 of 5: 100%|██████████| 17/17 [02:56<00:00, 10.36s/it, train_loss=1.06, val_loss=1.09, lr=1.08e-02]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 13 of 5: 100%|██████████| 17/17 [02:58<00:00, 10.48s/it, train_loss=0.879, val_loss=0.875, lr=1.08e-02]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 14 of 5: 100%|██████████| 17/17 [02:55<00:00, 10.31s/it, train_loss=0.995, val_loss=0.991, lr=1.08e-02]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 15 of 5: 100%|██████████| 17/17 [02:59<00:00, 10.57s/it, train_loss=0.879, val_loss=0.876, lr=1.08e-02]


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▄▇▂▅█▃▆▁▅▇▂▅▁▄▆▁▄█▃▅█▄▇▂▅█▃▆▁▄▇▂▅█▄▆▁▄█
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
overall_loss,▁
random_seed,▁
train_loss/0,▁▁▁▁▁▁█▇█▁▁▁▁▁▁▁▁
train_loss/1,▁▁▄████████▇▃▃▃▃▂
train_loss/10,▁▁▁▁▂▂▂▂▂▂▂▂▃▃▄██
train_loss/11,▁▂▂▂▂▂▂▂▂▂▂▁▁▁▁▂█
train_loss/12,▁▂▁▁▁▃███████████
train_loss/13,▁▁▁▁▁▁▁▁▁▄██▇▂▂▂▂

0,1
epoch,17.0
lr,0.01085
overall_loss,0.98068
random_seed,503676275.0
train_loss/0,0.87362
train_loss/1,0.90055
train_loss/10,1.07094
train_loss/11,1.058
train_loss/12,0.87909
train_loss/13,0.99464


[34m[1mwandb[0m: Agent Starting Run: pm2sp55v with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout: 0.5888886928892211
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0005923001635025975
[34m[1mwandb[0m: 	n_hidden: 3
[34m[1mwandb[0m: 	weight_decay: 0.0018247296209669744


{'batch_size': 128, 'dropout': 0.5888886928892211, 'epochs': 10, 'hidden_dim': 128, 'learning_rate': 0.0005923001635025975, 'n_hidden': 3, 'weight_decay': 0.0018247296209669744, 'k': 5, 'optimizer': 'adamw', 'architecture': 'WeightNetV4', 'data': 'ensemble_traval:non-standard', 'data_split': 'traval_split_80_20:v0', 'loss_func': 'owa', 'bn': False, 'feature_set': 'ma', 'act': 'leaky', 'num_models': 14, 'schedule': None}


[34m[1mwandb[0m: Downloading large artifact ensemble_traval:non-standard, 119.61MB. 1 files... Done. 0:0:0


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 1 of 5: 100%|██████████| 10/10 [01:23<00:00,  8.34s/it, train_loss=0.767, val_loss=0.776, lr=5.92e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 2 of 5: 100%|██████████| 10/10 [01:24<00:00,  8.43s/it, train_loss=0.776, val_loss=0.781, lr=5.92e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 3 of 5: 100%|██████████| 10/10 [01:23<00:00,  8.38s/it, train_loss=0.776, val_loss=0.781, lr=5.92e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 4 of 5: 100%|██████████| 10/10 [01:26<00:00,  8.61s/it, train_loss=0.775, val_loss=0.781, lr=5.92e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 5 of 5: 100%|██████████| 10/10 [01:25<00:00,  8.55s/it, train_loss=0.771, val_loss=0.834, lr=5.92e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 6 of 5: 100%|██████████| 10/10 [01:46<00:00, 10.65s/it, train_loss=0.767, val_loss=0.759, lr=5.92e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 7 of 5: 100%|██████████| 10/10 [01:44<00:00, 10.49s/it, train_loss=0.769, val_loss=0.769, lr=5.92e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 8 of 5: 100%|██████████| 10/10 [01:44<00:00, 10.41s/it, train_loss=0.773, val_loss=0.775, lr=5.92e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 9 of 5: 100%|██████████| 10/10 [01:45<00:00, 10.58s/it, train_loss=0.773, val_loss=0.774, lr=5.92e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 10 of 5: 100%|██████████| 10/10 [01:44<00:00, 10.43s/it, train_loss=0.773, val_loss=0.777, lr=5.92e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 11 of 5: 100%|██████████| 10/10 [01:47<00:00, 10.71s/it, train_loss=0.766, val_loss=0.763, lr=5.92e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 12 of 5: 100%|██████████| 10/10 [01:43<00:00, 10.40s/it, train_loss=0.769, val_loss=0.764, lr=5.92e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 13 of 5: 100%|██████████| 10/10 [01:48<00:00, 10.83s/it, train_loss=0.77, val_loss=0.769, lr=5.92e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 14 of 5: 100%|██████████| 10/10 [01:44<00:00, 10.42s/it, train_loss=0.772, val_loss=0.769, lr=5.92e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 15 of 5: 100%|██████████| 10/10 [01:44<00:00, 10.41s/it, train_loss=0.772, val_loss=0.769, lr=5.92e-04]


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▆▁▅█▃▆▁▅▆▁▅█▃▆▁▅█▃▅█▃▆▁▅█▃▆▁▃▆▁▅█▃▆▁▅█
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
overall_loss,▁
random_seed,▁
train_loss/0,█▃▃▂▂▂▂▁▁▁
train_loss/1,█▁▁▁▁▁▁▁▁▁
train_loss/10,█▄▃▃▂▂▂▁▁▁
train_loss/11,█▄▃▃▂▂▂▁▁▁
train_loss/12,█▃▃▂▂▂▂▁▁▁
train_loss/13,█▃▃▂▂▂▁▁▁▁

0,1
epoch,10.0
lr,0.00059
overall_loss,0.7667
random_seed,257752589.0
train_loss/0,0.76693
train_loss/1,0.77593
train_loss/10,0.76588
train_loss/11,0.76877
train_loss/12,0.7702
train_loss/13,0.77175


[34m[1mwandb[0m: Agent Starting Run: s8mu310k with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0.4558714671336193
[34m[1mwandb[0m: 	epochs: 17
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00024053354275206288
[34m[1mwandb[0m: 	n_hidden: 3
[34m[1mwandb[0m: 	weight_decay: 0.0018449144920539072


{'batch_size': 256, 'dropout': 0.4558714671336193, 'epochs': 17, 'hidden_dim': 128, 'learning_rate': 0.00024053354275206288, 'n_hidden': 3, 'weight_decay': 0.0018449144920539072, 'k': 5, 'optimizer': 'adamw', 'architecture': 'WeightNetV4', 'data': 'ensemble_traval:non-standard', 'data_split': 'traval_split_80_20:v0', 'loss_func': 'owa', 'bn': False, 'feature_set': 'ma', 'act': 'leaky', 'num_models': 14, 'schedule': None}


[34m[1mwandb[0m: Downloading large artifact ensemble_traval:non-standard, 119.61MB. 1 files... Done. 0:0:0


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 1 of 5: 100%|██████████| 17/17 [01:44<00:00,  6.13s/it, train_loss=0.772, val_loss=0.78, lr=2.41e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 2 of 5: 100%|██████████| 17/17 [01:45<00:00,  6.22s/it, train_loss=0.779, val_loss=0.783, lr=2.41e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 3 of 5: 100%|██████████| 17/17 [01:41<00:00,  5.99s/it, train_loss=0.78, val_loss=0.783, lr=2.41e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 4 of 5: 100%|██████████| 17/17 [01:49<00:00,  6.41s/it, train_loss=0.78, val_loss=0.785, lr=2.41e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 5 of 5: 100%|██████████| 17/17 [01:48<00:00,  6.39s/it, train_loss=0.776, val_loss=0.827, lr=2.41e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 6 of 5: 100%|██████████| 17/17 [02:10<00:00,  7.68s/it, train_loss=0.769, val_loss=0.762, lr=2.41e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 7 of 5: 100%|██████████| 17/17 [02:09<00:00,  7.62s/it, train_loss=0.773, val_loss=0.774, lr=2.41e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 8 of 5: 100%|██████████| 17/17 [02:10<00:00,  7.66s/it, train_loss=0.775, val_loss=0.776, lr=2.41e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 9 of 5: 100%|██████████| 17/17 [02:10<00:00,  7.70s/it, train_loss=0.775, val_loss=0.778, lr=2.41e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 10 of 5: 100%|██████████| 17/17 [02:15<00:00,  7.96s/it, train_loss=0.774, val_loss=0.779, lr=2.41e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 11 of 5: 100%|██████████| 17/17 [02:16<00:00,  8.05s/it, train_loss=0.777, val_loss=0.775, lr=2.41e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 12 of 5: 100%|██████████| 17/17 [02:12<00:00,  7.81s/it, train_loss=0.78, val_loss=0.778, lr=2.41e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 13 of 5: 100%|██████████| 17/17 [02:17<00:00,  8.10s/it, train_loss=0.781, val_loss=0.783, lr=2.41e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 14 of 5: 100%|██████████| 17/17 [02:16<00:00,  8.04s/it, train_loss=0.78, val_loss=0.778, lr=2.41e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 15 of 5: 100%|██████████| 17/17 [02:18<00:00,  8.14s/it, train_loss=0.781, val_loss=0.782, lr=2.41e-04]


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▄▇▂▅█▃▆▁▅▇▂▅▁▄▆▁▄█▃▅█▄▇▂▅█▃▆▁▄▇▂▅█▄▆▁▄█
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
overall_loss,▁
random_seed,▁
train_loss/0,█▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁
train_loss/1,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/10,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/11,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/12,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/13,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,17.0
lr,0.00024
overall_loss,0.779
random_seed,48662687.0
train_loss/0,0.77211
train_loss/1,0.77942
train_loss/10,0.7767
train_loss/11,0.77995
train_loss/12,0.78095
train_loss/13,0.77998


[34m[1mwandb[0m: Agent Starting Run: n17hfs20 with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	dropout: 0.6409094123022885
[34m[1mwandb[0m: 	epochs: 6
[34m[1mwandb[0m: 	hidden_dim: 192
[34m[1mwandb[0m: 	learning_rate: 0.00020269149158930335
[34m[1mwandb[0m: 	n_hidden: 4
[34m[1mwandb[0m: 	weight_decay: 0.0026710394666937583


{'batch_size': 512, 'dropout': 0.6409094123022885, 'epochs': 6, 'hidden_dim': 192, 'learning_rate': 0.00020269149158930335, 'n_hidden': 4, 'weight_decay': 0.0026710394666937583, 'k': 5, 'optimizer': 'adamw', 'architecture': 'WeightNetV4', 'data': 'ensemble_traval:non-standard', 'data_split': 'traval_split_80_20:v0', 'loss_func': 'owa', 'bn': False, 'feature_set': 'ma', 'act': 'leaky', 'num_models': 14, 'schedule': None}


[34m[1mwandb[0m: Downloading large artifact ensemble_traval:non-standard, 119.61MB. 1 files... Done. 0:0:0


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 1 of 5: 100%|██████████| 6/6 [00:35<00:00,  5.86s/it, train_loss=0.791, val_loss=0.791, lr=2.03e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 2 of 5: 100%|██████████| 6/6 [00:35<00:00,  5.87s/it, train_loss=0.801, val_loss=0.8, lr=2.03e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 3 of 5: 100%|██████████| 6/6 [00:35<00:00,  5.88s/it, train_loss=0.802, val_loss=0.799, lr=2.03e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 4 of 5: 100%|██████████| 6/6 [00:35<00:00,  5.97s/it, train_loss=0.799, val_loss=0.802, lr=2.03e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 5 of 5: 100%|██████████| 6/6 [00:34<00:00,  5.82s/it, train_loss=0.792, val_loss=0.827, lr=2.03e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 6 of 5: 100%|██████████| 6/6 [00:41<00:00,  6.88s/it, train_loss=0.792, val_loss=0.785, lr=2.03e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 7 of 5: 100%|██████████| 6/6 [00:41<00:00,  6.85s/it, train_loss=0.797, val_loss=0.796, lr=2.03e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 8 of 5: 100%|██████████| 6/6 [00:42<00:00,  7.03s/it, train_loss=0.797, val_loss=0.796, lr=2.03e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 9 of 5: 100%|██████████| 6/6 [00:41<00:00,  6.96s/it, train_loss=0.797, val_loss=0.803, lr=2.03e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 10 of 5: 100%|██████████| 6/6 [00:42<00:00,  7.11s/it, train_loss=0.796, val_loss=0.803, lr=2.03e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 11 of 5: 100%|██████████| 6/6 [00:41<00:00,  6.89s/it, train_loss=0.787, val_loss=0.782, lr=2.03e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 12 of 5: 100%|██████████| 6/6 [00:38<00:00,  6.43s/it, train_loss=0.79, val_loss=0.789, lr=2.03e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 13 of 5: 100%|██████████| 6/6 [00:37<00:00,  6.17s/it, train_loss=0.79, val_loss=0.793, lr=2.03e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 14 of 5: 100%|██████████| 6/6 [00:37<00:00,  6.24s/it, train_loss=0.79, val_loss=0.789, lr=2.03e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 15 of 5: 100%|██████████| 6/6 [00:37<00:00,  6.22s/it, train_loss=0.791, val_loss=0.789, lr=2.03e-04]


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▄▇▁▅█▂▅▁▄▇▂▅█▂▇▁▄█▂▅█▄▇▁▅█▂▅▁▄▇▂▅█▂▇▁▄█
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
overall_loss,▁
random_seed,▁
train_loss/0,█▃▁▁▁▁
train_loss/1,█▂▁▁▁▁
train_loss/10,█▂▂▁▁▁
train_loss/11,█▂▂▁▁▁
train_loss/12,█▂▂▁▁▁
train_loss/13,█▂▂▁▁▁

0,1
epoch,6.0
lr,0.0002
overall_loss,0.7883
random_seed,836584454.0
train_loss/0,0.79116
train_loss/1,0.80073
train_loss/10,0.7866
train_loss/11,0.78998
train_loss/12,0.79039
train_loss/13,0.79011


[34m[1mwandb[0m: Agent Starting Run: ed5ql5o5 with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	dropout: 0.6662019520450536
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0005048064339536675
[34m[1mwandb[0m: 	n_hidden: 3
[34m[1mwandb[0m: 	weight_decay: 0.000766144030180305


{'batch_size': 512, 'dropout': 0.6662019520450536, 'epochs': 10, 'hidden_dim': 128, 'learning_rate': 0.0005048064339536675, 'n_hidden': 3, 'weight_decay': 0.000766144030180305, 'k': 5, 'optimizer': 'adamw', 'architecture': 'WeightNetV4', 'data': 'ensemble_traval:non-standard', 'data_split': 'traval_split_80_20:v0', 'loss_func': 'owa', 'bn': False, 'feature_set': 'ma', 'act': 'leaky', 'num_models': 14, 'schedule': None}


[34m[1mwandb[0m: Downloading large artifact ensemble_traval:non-standard, 119.61MB. 1 files... Done. 0:0:0


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 1 of 5: 100%|██████████| 10/10 [00:50<00:00,  5.04s/it, train_loss=0.774, val_loss=0.779, lr=5.05e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 2 of 5: 100%|██████████| 10/10 [00:52<00:00,  5.27s/it, train_loss=0.788, val_loss=0.788, lr=5.05e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 3 of 5: 100%|██████████| 10/10 [00:51<00:00,  5.19s/it, train_loss=0.789, val_loss=0.788, lr=5.05e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 4 of 5: 100%|██████████| 10/10 [00:50<00:00,  5.07s/it, train_loss=0.787, val_loss=0.791, lr=5.05e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 5 of 5: 100%|██████████| 10/10 [00:50<00:00,  5.05s/it, train_loss=0.784, val_loss=0.836, lr=5.05e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 6 of 5: 100%|██████████| 10/10 [01:03<00:00,  6.33s/it, train_loss=0.774, val_loss=0.768, lr=5.05e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 7 of 5: 100%|██████████| 10/10 [01:00<00:00,  6.09s/it, train_loss=0.779, val_loss=0.78, lr=5.05e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 8 of 5: 100%|██████████| 10/10 [01:01<00:00,  6.11s/it, train_loss=0.781, val_loss=0.781, lr=5.05e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 9 of 5: 100%|██████████| 10/10 [01:00<00:00,  6.07s/it, train_loss=0.781, val_loss=0.787, lr=5.05e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 10 of 5: 100%|██████████| 10/10 [01:00<00:00,  6.00s/it, train_loss=0.779, val_loss=0.784, lr=5.05e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 11 of 5: 100%|██████████| 10/10 [00:59<00:00,  5.92s/it, train_loss=0.773, val_loss=0.767, lr=5.05e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 12 of 5: 100%|██████████| 10/10 [00:58<00:00,  5.86s/it, train_loss=0.778, val_loss=0.777, lr=5.05e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 13 of 5: 100%|██████████| 10/10 [01:00<00:00,  6.01s/it, train_loss=0.778, val_loss=0.78, lr=5.05e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 14 of 5: 100%|██████████| 10/10 [00:59<00:00,  5.92s/it, train_loss=0.779, val_loss=0.776, lr=5.05e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 15 of 5: 100%|██████████| 10/10 [00:59<00:00,  5.94s/it, train_loss=0.779, val_loss=0.778, lr=5.05e-04]


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▆▁▅█▃▆▁▅▆▁▅█▃▆▁▅█▃▅█▃▆▁▅█▃▆▁▃▆▁▅█▃▆▁▅█
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
overall_loss,▁
random_seed,▁
train_loss/0,█▃▂▂▂▂▁▁▁▁
train_loss/1,█▂▁▁▁▁▁▁▁▁
train_loss/10,█▃▂▂▂▂▁▁▁▁
train_loss/11,█▃▂▂▂▂▁▁▁▁
train_loss/12,█▃▂▂▂▁▁▁▁▁
train_loss/13,█▂▂▂▂▁▁▁▁▁

0,1
epoch,10.0
lr,0.0005
overall_loss,0.77536
random_seed,277850414.0
train_loss/0,0.77442
train_loss/1,0.78821
train_loss/10,0.77255
train_loss/11,0.77752
train_loss/12,0.77828
train_loss/13,0.7785


[34m[1mwandb[0m: Agent Starting Run: prdgos95 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0.7457999395056188
[34m[1mwandb[0m: 	epochs: 19
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00019750630394272184
[34m[1mwandb[0m: 	n_hidden: 2
[34m[1mwandb[0m: 	weight_decay: 0.0002412609602244394


{'batch_size': 256, 'dropout': 0.7457999395056188, 'epochs': 19, 'hidden_dim': 64, 'learning_rate': 0.00019750630394272184, 'n_hidden': 2, 'weight_decay': 0.0002412609602244394, 'k': 5, 'optimizer': 'adamw', 'architecture': 'WeightNetV4', 'data': 'ensemble_traval:non-standard', 'data_split': 'traval_split_80_20:v0', 'loss_func': 'owa', 'bn': False, 'feature_set': 'ma', 'act': 'leaky', 'num_models': 14, 'schedule': None}


[34m[1mwandb[0m: Downloading large artifact ensemble_traval:non-standard, 119.61MB. 1 files... Done. 0:0:0


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 1 of 5: 100%|██████████| 19/19 [01:55<00:00,  6.10s/it, train_loss=0.782, val_loss=0.787, lr=1.98e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 2 of 5: 100%|██████████| 19/19 [01:56<00:00,  6.15s/it, train_loss=0.793, val_loss=0.794, lr=1.98e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 3 of 5: 100%|██████████| 19/19 [01:54<00:00,  6.04s/it, train_loss=0.794, val_loss=0.792, lr=1.98e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 4 of 5: 100%|██████████| 19/19 [01:55<00:00,  6.06s/it, train_loss=0.791, val_loss=0.794, lr=1.98e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 5 of 5: 100%|██████████| 19/19 [01:54<00:00,  6.00s/it, train_loss=0.788, val_loss=0.827, lr=1.98e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 6 of 5: 100%|██████████| 19/19 [02:21<00:00,  7.47s/it, train_loss=0.783, val_loss=0.778, lr=1.98e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 7 of 5: 100%|██████████| 19/19 [02:18<00:00,  7.31s/it, train_loss=0.788, val_loss=0.788, lr=1.98e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 8 of 5: 100%|██████████| 19/19 [02:18<00:00,  7.31s/it, train_loss=0.788, val_loss=0.788, lr=1.98e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 9 of 5: 100%|██████████| 19/19 [02:18<00:00,  7.29s/it, train_loss=0.789, val_loss=0.793, lr=1.98e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 10 of 5: 100%|██████████| 19/19 [02:19<00:00,  7.32s/it, train_loss=0.787, val_loss=0.793, lr=1.98e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 11 of 5: 100%|██████████| 19/19 [02:24<00:00,  7.59s/it, train_loss=0.784, val_loss=0.782, lr=1.98e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 12 of 5: 100%|██████████| 19/19 [02:19<00:00,  7.33s/it, train_loss=0.787, val_loss=0.786, lr=1.98e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 13 of 5: 100%|██████████| 19/19 [02:22<00:00,  7.52s/it, train_loss=0.788, val_loss=0.789, lr=1.98e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 14 of 5: 100%|██████████| 19/19 [02:30<00:00,  7.93s/it, train_loss=0.787, val_loss=0.788, lr=1.98e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 15 of 5: 100%|██████████| 19/19 [02:25<00:00,  7.67s/it, train_loss=0.786, val_loss=0.787, lr=1.98e-04]


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▂▄▇▃▆▇▃▆▂▅▆▂▅▁▄▆▁▅█▃▅█▄▇▃▄▇▃▆▂▃▆▂▅▁▃▆▁▅█
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
overall_loss,▁
random_seed,▁
train_loss/0,█▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/1,█▅▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/10,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/11,█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/12,█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/13,█▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,19.0
lr,0.0002
overall_loss,0.78653
random_seed,819207338.0
train_loss/0,0.78196
train_loss/1,0.79294
train_loss/10,0.78446
train_loss/11,0.78745
train_loss/12,0.78815
train_loss/13,0.78742


[34m[1mwandb[0m: Agent Starting Run: 75vrwma4 with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	dropout: 0.6976958098349837
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	hidden_dim: 192
[34m[1mwandb[0m: 	learning_rate: 0.0007644261041089425
[34m[1mwandb[0m: 	n_hidden: 2
[34m[1mwandb[0m: 	weight_decay: 0.007759780501493042


{'batch_size': 256, 'dropout': 0.6976958098349837, 'epochs': 20, 'hidden_dim': 192, 'learning_rate': 0.0007644261041089425, 'n_hidden': 2, 'weight_decay': 0.007759780501493042, 'k': 5, 'optimizer': 'adamw', 'architecture': 'WeightNetV4', 'data': 'ensemble_traval:non-standard', 'data_split': 'traval_split_80_20:v0', 'loss_func': 'owa', 'bn': False, 'feature_set': 'ma', 'act': 'leaky', 'num_models': 14, 'schedule': None}


[34m[1mwandb[0m: Downloading large artifact ensemble_traval:non-standard, 119.61MB. 1 files... Done. 0:0:0


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 1 of 5: 100%|██████████| 20/20 [02:14<00:00,  6.73s/it, train_loss=0.751, val_loss=0.768, lr=7.64e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 2 of 5: 100%|██████████| 20/20 [02:11<00:00,  6.57s/it, train_loss=0.757, val_loss=0.772, lr=7.64e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 3 of 5: 100%|██████████| 20/20 [02:14<00:00,  6.72s/it, train_loss=0.76, val_loss=0.774, lr=7.64e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 4 of 5: 100%|██████████| 20/20 [02:17<00:00,  6.90s/it, train_loss=0.757, val_loss=0.778, lr=7.64e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 5 of 5: 100%|██████████| 20/20 [02:19<00:00,  6.98s/it, train_loss=0.758, val_loss=0.824, lr=7.64e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 6 of 5: 100%|██████████| 20/20 [02:43<00:00,  8.16s/it, train_loss=0.749, val_loss=0.74, lr=7.64e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 7 of 5: 100%|██████████| 20/20 [02:44<00:00,  8.23s/it, train_loss=0.756, val_loss=0.755, lr=7.64e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 8 of 5: 100%|██████████| 20/20 [02:45<00:00,  8.30s/it, train_loss=0.756, val_loss=0.759, lr=7.64e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 9 of 5: 100%|██████████| 20/20 [02:42<00:00,  8.15s/it, train_loss=0.755, val_loss=0.76, lr=7.64e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 10 of 5: 100%|██████████| 20/20 [02:44<00:00,  8.22s/it, train_loss=0.754, val_loss=0.757, lr=7.64e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 11 of 5: 100%|██████████| 20/20 [02:42<00:00,  8.15s/it, train_loss=0.753, val_loss=0.749, lr=7.64e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 12 of 5: 100%|██████████| 20/20 [02:42<00:00,  8.14s/it, train_loss=0.757, val_loss=0.755, lr=7.64e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 13 of 5: 100%|██████████| 20/20 [02:39<00:00,  7.99s/it, train_loss=0.758, val_loss=0.759, lr=7.64e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 14 of 5: 100%|██████████| 20/20 [02:43<00:00,  8.17s/it, train_loss=0.759, val_loss=0.758, lr=7.64e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 15 of 5: 100%|██████████| 20/20 [02:40<00:00,  8.02s/it, train_loss=0.758, val_loss=0.758, lr=7.64e-04]


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▆▁▅█▃▆▁▅▆▁▅█▃▆▁▅█▃▅█▃▆▁▅█▃▆▁▃▆▁▅█▃▆▁▅█
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
overall_loss,▁
random_seed,▁
train_loss/0,█▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁
train_loss/1,█▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/10,█▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/11,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/12,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/13,█▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,20.0
lr,0.00076
overall_loss,0.75576
random_seed,315195991.0
train_loss/0,0.75085
train_loss/1,0.75739
train_loss/10,0.75262
train_loss/11,0.75722
train_loss/12,0.75831
train_loss/13,0.75903


[34m[1mwandb[0m: Agent Starting Run: sus12a23 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout: 0.6418573613848797
[34m[1mwandb[0m: 	epochs: 28
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00013267973604969187
[34m[1mwandb[0m: 	n_hidden: 2
[34m[1mwandb[0m: 	weight_decay: 0.013440107383436869


{'batch_size': 128, 'dropout': 0.6418573613848797, 'epochs': 28, 'hidden_dim': 256, 'learning_rate': 0.00013267973604969187, 'n_hidden': 2, 'weight_decay': 0.013440107383436869, 'k': 5, 'optimizer': 'adamw', 'architecture': 'WeightNetV4', 'data': 'ensemble_traval:non-standard', 'data_split': 'traval_split_80_20:v0', 'loss_func': 'owa', 'bn': False, 'feature_set': 'ma', 'act': 'leaky', 'num_models': 14, 'schedule': None}


[34m[1mwandb[0m: Downloading large artifact ensemble_traval:non-standard, 119.61MB. 1 files... Done. 0:0:0


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 1 of 5: 100%|██████████| 28/28 [03:55<00:00,  8.42s/it, train_loss=0.759, val_loss=0.772, lr=1.33e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 2 of 5: 100%|██████████| 28/28 [04:11<00:00,  8.99s/it, train_loss=0.768, val_loss=0.777, lr=1.33e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 3 of 5: 100%|██████████| 28/28 [04:04<00:00,  8.73s/it, train_loss=0.768, val_loss=0.776, lr=1.33e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 4 of 5: 100%|██████████| 28/28 [04:09<00:00,  8.92s/it, train_loss=0.767, val_loss=0.778, lr=1.33e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 5 of 5: 100%|██████████| 28/28 [04:08<00:00,  8.88s/it, train_loss=0.764, val_loss=0.835, lr=1.33e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 6 of 5: 100%|██████████| 28/28 [04:54<00:00, 10.52s/it, train_loss=0.76, val_loss=0.753, lr=1.33e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 7 of 5: 100%|██████████| 28/28 [04:53<00:00, 10.49s/it, train_loss=0.765, val_loss=0.766, lr=1.33e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 8 of 5: 100%|██████████| 28/28 [04:52<00:00, 10.46s/it, train_loss=0.765, val_loss=0.767, lr=1.33e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 9 of 5: 100%|██████████| 28/28 [04:56<00:00, 10.58s/it, train_loss=0.765, val_loss=0.767, lr=1.33e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 10 of 5: 100%|██████████| 28/28 [04:58<00:00, 10.66s/it, train_loss=0.765, val_loss=0.77, lr=1.33e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 11 of 5: 100%|██████████| 28/28 [04:58<00:00, 10.67s/it, train_loss=0.758, val_loss=0.756, lr=1.33e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 12 of 5: 100%|██████████| 28/28 [04:56<00:00, 10.58s/it, train_loss=0.763, val_loss=0.762, lr=1.33e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 13 of 5: 100%|██████████| 28/28 [04:54<00:00, 10.51s/it, train_loss=0.763, val_loss=0.766, lr=1.33e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 14 of 5: 100%|██████████| 28/28 [04:57<00:00, 10.61s/it, train_loss=0.763, val_loss=0.764, lr=1.33e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 15 of 5: 100%|██████████| 28/28 [04:51<00:00, 10.41s/it, train_loss=0.763, val_loss=0.766, lr=1.33e-04]


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▇▂▅█▃▆▁▅▇▂▆█▃▇▁▅█▂▆▁▃▇▂▅█▃▆▁▅▇▂▆█▃▇▁▅█
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
overall_loss,▁
random_seed,▁
train_loss/0,█▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
train_loss/1,█▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/10,█▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/11,█▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/12,█▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/13,█▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,28.0
lr,0.00013
overall_loss,0.76265
random_seed,499445768.0
train_loss/0,0.75901
train_loss/1,0.76758
train_loss/10,0.75768
train_loss/11,0.76266
train_loss/12,0.76304
train_loss/13,0.76296


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9safxeb4 with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	dropout: 0.750391715489802
[34m[1mwandb[0m: 	epochs: 23
[34m[1mwandb[0m: 	hidden_dim: 192
[34m[1mwandb[0m: 	learning_rate: 0.0029944988030013793
[34m[1mwandb[0m: 	n_hidden: 3
[34m[1mwandb[0m: 	weight_decay: 0.0007846034434177841


{'batch_size': 512, 'dropout': 0.750391715489802, 'epochs': 23, 'hidden_dim': 192, 'learning_rate': 0.0029944988030013793, 'n_hidden': 3, 'weight_decay': 0.0007846034434177841, 'k': 5, 'optimizer': 'adamw', 'architecture': 'WeightNetV4', 'data': 'ensemble_traval:non-standard', 'data_split': 'traval_split_80_20:v0', 'loss_func': 'owa', 'bn': False, 'feature_set': 'ma', 'act': 'leaky', 'num_models': 14, 'schedule': None}


[34m[1mwandb[0m: Downloading large artifact ensemble_traval:non-standard, 119.61MB. 1 files... Done. 0:0:0


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 1 of 5: 100%|██████████| 23/23 [01:59<00:00,  5.21s/it, train_loss=0.732, val_loss=0.772, lr=2.99e-03]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 2 of 5: 100%|██████████| 23/23 [02:00<00:00,  5.24s/it, train_loss=0.738, val_loss=0.774, lr=2.99e-03]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 3 of 5: 100%|██████████| 23/23 [01:56<00:00,  5.05s/it, train_loss=0.741, val_loss=0.77, lr=2.99e-03]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 4 of 5: 100%|██████████| 23/23 [02:02<00:00,  5.34s/it, train_loss=0.741, val_loss=0.777, lr=2.99e-03]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 5 of 5: 100%|██████████| 23/23 [02:02<00:00,  5.33s/it, train_loss=0.73, val_loss=0.853, lr=2.99e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 6 of 5: 100%|██████████| 23/23 [02:29<00:00,  6.51s/it, train_loss=0.737, val_loss=0.724, lr=2.99e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 7 of 5: 100%|██████████| 23/23 [02:32<00:00,  6.65s/it, train_loss=0.746, val_loss=0.743, lr=2.99e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 8 of 5: 100%|██████████| 23/23 [02:33<00:00,  6.69s/it, train_loss=0.745, val_loss=0.747, lr=2.99e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 9 of 5: 100%|██████████| 23/23 [02:36<00:00,  6.80s/it, train_loss=0.745, val_loss=0.748, lr=2.99e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 10 of 5: 100%|██████████| 23/23 [02:36<00:00,  6.78s/it, train_loss=0.742, val_loss=0.748, lr=2.99e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 11 of 5: 100%|██████████| 23/23 [02:35<00:00,  6.75s/it, train_loss=0.73, val_loss=0.721, lr=2.99e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 12 of 5: 100%|██████████| 23/23 [02:35<00:00,  6.77s/it, train_loss=0.737, val_loss=0.734, lr=2.99e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 13 of 5: 100%|██████████| 23/23 [02:42<00:00,  7.08s/it, train_loss=0.739, val_loss=0.74, lr=2.99e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 14 of 5: 100%|██████████| 23/23 [02:40<00:00,  7.00s/it, train_loss=0.735, val_loss=0.73, lr=2.99e-03]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 15 of 5: 100%|██████████| 23/23 [02:43<00:00,  7.11s/it, train_loss=0.764, val_loss=0.754, lr=2.99e-03]


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▂▅▇▂▅▇▄▆▂▄▇▂▆▁▄▆▁▅█▃▅█▃▇▂▅▇▃▆▂▄▇▂▅▁▄▆▁▄▆
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
overall_loss,▁
random_seed,▁
train_loss/0,█▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁
train_loss/1,█▆▅▅▅▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
train_loss/10,█▆▅▅▅▄▄▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁
train_loss/11,█▆▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁
train_loss/12,█▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
train_loss/13,█▆▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁

0,1
epoch,23.0
lr,0.00299
overall_loss,0.73567
random_seed,532718768.0
train_loss/0,0.73176
train_loss/1,0.73811
train_loss/10,0.72991
train_loss/11,0.73738
train_loss/12,0.73897
train_loss/13,0.73497


[34m[1mwandb[0m: Agent Starting Run: 67r5uq47 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout: 0.735336932978689
[34m[1mwandb[0m: 	epochs: 60
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0005389725887901126
[34m[1mwandb[0m: 	n_hidden: 2
[34m[1mwandb[0m: 	weight_decay: 0.00394343008023824


{'batch_size': 128, 'dropout': 0.735336932978689, 'epochs': 60, 'hidden_dim': 64, 'learning_rate': 0.0005389725887901126, 'n_hidden': 2, 'weight_decay': 0.00394343008023824, 'k': 5, 'optimizer': 'adamw', 'architecture': 'WeightNetV4', 'data': 'ensemble_traval:non-standard', 'data_split': 'traval_split_80_20:v0', 'loss_func': 'owa', 'bn': False, 'feature_set': 'ma', 'act': 'leaky', 'num_models': 14, 'schedule': None}


[34m[1mwandb[0m: Downloading large artifact ensemble_traval:non-standard, 119.61MB. 1 files... Done. 0:0:0


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 1 of 5: 100%|██████████| 60/60 [08:49<00:00,  8.82s/it, train_loss=0.743, val_loss=0.771, lr=5.39e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 2 of 5: 100%|██████████| 60/60 [08:51<00:00,  8.86s/it, train_loss=0.752, val_loss=0.773, lr=5.39e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 3 of 5: 100%|██████████| 60/60 [08:44<00:00,  8.74s/it, train_loss=0.754, val_loss=0.772, lr=5.39e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 4 of 5: 100%|██████████| 60/60 [08:38<00:00,  8.65s/it, train_loss=0.751, val_loss=0.776, lr=5.39e-04]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 5 of 5: 100%|██████████| 60/60 [08:44<00:00,  8.75s/it, train_loss=0.749, val_loss=0.935, lr=5.39e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 6 of 5: 100%|██████████| 60/60 [10:39<00:00, 10.65s/it, train_loss=0.745, val_loss=0.738, lr=5.39e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 7 of 5: 100%|██████████| 60/60 [10:30<00:00, 10.51s/it, train_loss=0.749, val_loss=0.749, lr=5.39e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 8 of 5: 100%|██████████| 60/60 [10:43<00:00, 10.73s/it, train_loss=0.751, val_loss=0.753, lr=5.39e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 9 of 5: 100%|██████████| 60/60 [10:28<00:00, 10.48s/it, train_loss=0.75, val_loss=0.754, lr=5.39e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 10 of 5: 100%|██████████| 60/60 [10:22<00:00, 10.38s/it, train_loss=0.748, val_loss=0.752, lr=5.39e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 11 of 5: 100%|██████████| 60/60 [10:24<00:00, 10.41s/it, train_loss=0.745, val_loss=0.742, lr=5.39e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 12 of 5: 100%|██████████| 60/60 [10:34<00:00, 10.58s/it, train_loss=0.749, val_loss=0.747, lr=5.39e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 13 of 5: 100%|██████████| 60/60 [10:24<00:00, 10.41s/it, train_loss=0.749, val_loss=0.752, lr=5.39e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 14 of 5: 100%|██████████| 60/60 [10:30<00:00, 10.50s/it, train_loss=0.75, val_loss=0.751, lr=5.39e-04]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 15 of 5: 100%|██████████| 60/60 [10:18<00:00, 10.30s/it, train_loss=0.749, val_loss=0.75, lr=5.39e-04]


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,▁▃▇▂▅█▃▅▁▄▇▃▅▁▃▆▁▅▇▃▅█▃▇▂▅█▃▅▁▄▇▃▅▇▃▆▁▅▇
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
overall_loss,▁
random_seed,▁
train_loss/0,█▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/1,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/10,█▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/11,█▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/12,█▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss/13,█▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,60.0
lr,0.00054
overall_loss,0.74854
random_seed,106932713.0
train_loss/0,0.7434
train_loss/1,0.7521
train_loss/10,0.74486
train_loss/11,0.74927
train_loss/12,0.74945
train_loss/13,0.75004


[34m[1mwandb[0m: Agent Starting Run: lgmx3aa6 with config:
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	dropout: 0.674445141536874
[34m[1mwandb[0m: 	epochs: 148
[34m[1mwandb[0m: 	hidden_dim: 192
[34m[1mwandb[0m: 	learning_rate: 5.644574410837081e-05
[34m[1mwandb[0m: 	n_hidden: 4
[34m[1mwandb[0m: 	weight_decay: 0.0001309226140893589


{'batch_size': 1024, 'dropout': 0.674445141536874, 'epochs': 148, 'hidden_dim': 192, 'learning_rate': 5.644574410837081e-05, 'n_hidden': 4, 'weight_decay': 0.0001309226140893589, 'k': 5, 'optimizer': 'adamw', 'architecture': 'WeightNetV4', 'data': 'ensemble_traval:non-standard', 'data_split': 'traval_split_80_20:v0', 'loss_func': 'owa', 'bn': False, 'feature_set': 'ma', 'act': 'leaky', 'num_models': 14, 'schedule': None}


[34m[1mwandb[0m: Downloading large artifact ensemble_traval:non-standard, 119.61MB. 1 files... Done. 0:0:0


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 1 of 5: 100%|██████████| 148/148 [12:57<00:00,  5.26s/it, train_loss=0.764, val_loss=0.779, lr=5.64e-05]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 2 of 5: 100%|██████████| 148/148 [13:51<00:00,  5.62s/it, train_loss=0.771, val_loss=0.781, lr=5.64e-05]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 3 of 5: 100%|██████████| 148/148 [14:05<00:00,  5.71s/it, train_loss=0.773, val_loss=0.781, lr=5.64e-05]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 4 of 5: 100%|██████████| 148/148 [14:16<00:00,  5.79s/it, train_loss=0.773, val_loss=0.783, lr=5.64e-05]


CPU count: 2
Loaded df of shape (79996, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 5 of 5: 100%|██████████| 148/148 [13:56<00:00,  5.65s/it, train_loss=0.771, val_loss=0.846, lr=5.64e-05]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 6 of 5: 100%|██████████| 148/148 [16:43<00:00,  6.78s/it, train_loss=0.761, val_loss=0.755, lr=5.64e-05]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 7 of 5: 100%|██████████| 148/148 [16:04<00:00,  6.52s/it, train_loss=0.768, val_loss=0.769, lr=5.64e-05]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 8 of 5: 100%|██████████| 148/148 [16:13<00:00,  6.57s/it, train_loss=0.769, val_loss=0.771, lr=5.64e-05]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 9 of 5: 100%|██████████| 148/148 [15:43<00:00,  6.37s/it, train_loss=0.769, val_loss=0.772, lr=5.64e-05]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 10 of 5: 100%|██████████| 148/148 [15:00<00:00,  6.08s/it, train_loss=0.768, val_loss=0.775, lr=5.64e-05]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 11 of 5: 100%|██████████| 148/148 [15:04<00:00,  6.11s/it, train_loss=0.763, val_loss=0.763, lr=5.64e-05]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 12 of 5: 100%|██████████| 148/148 [15:22<00:00,  6.23s/it, train_loss=0.768, val_loss=0.765, lr=5.64e-05]


CPU count: 2
Loaded df of shape (99995, 801)
Loaded df of shape (19999, 801)
Moving model to device: cuda


Fold 13 of 5:  60%|██████    | 89/148 [09:11<06:12,  6.32s/it, train_loss=0.779, val_loss=0.783, lr=5.64e-05]