In [1]:
from torch.utils.data import DataLoader

from tqdm.notebook import tqdm
import wandb

from WindModel import *

wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mbhavye-mathur[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
WindDataset.init(0.1)

train = WindDataset("train")
validation = WindDataset("validation")
test = WindDataset("test")

del WindDataset.data

In [4]:
INPUT_SIZE = 15
OUTPUT_SIZE = 1

LOSS_FUNC = torch.nn.MSELoss

In [5]:
def get_dense_model(input_size: int,
                    hidden_sizes: list[int],
                    output_size: int,
                    activation_func: callable):
    layers = []

    for size in hidden_sizes:
        layers.append(torch.nn.Linear(input_size, size))
        layers.append(activation_func())
        input_size = size

    layers.append(torch.nn.Linear(input_size, output_size))

    return torch.nn.Sequential(*layers)

In [6]:
def evaluate_one_epoch(model, epoch):
    mse = 0
    mae = 0

    with torch.no_grad():
        prediction = model(validation.x).squeeze()

        mse += torch.nn.functional.mse_loss(prediction, validation.y)
        mae += torch.nn.functional.l1_loss(prediction, validation.y)

    wandb.log({"val_rmse": (mse ** 0.5) * stds[VARIABLE],
               "val_mae": mae * stds[VARIABLE]})


def train_one_batch(model, optimizer, criterion, batch, batch_idx):
    optimizer.zero_grad()

    inputs, targets = batch

    prediction = model(inputs).squeeze()
    loss = criterion(prediction, targets)

    loss.backward()
    optimizer.step()

    if batch_idx != 0 and batch_idx % 100 == 0:
        with torch.no_grad():
            rmse = (torch.nn.functional.mse_loss(prediction, targets) ** 0.5) * stds[VARIABLE]
            wandb.log({"train_loss": loss,
                       "train_rmse": rmse})


def train_one_epoch(model, optimizer, criterion, epoch, batch_size):
    n = len(train)

    for i in range(len(train) // batch_size):
        lower_i = i * batch_size
        upper_i = min((i + 1) * batch_size, n)

        batch_x = train.x[lower_i: upper_i]
        batch_y = train.y[lower_i: upper_i]

        train_one_batch(model, optimizer, criterion, (batch_x, batch_y), i)


def main():
    wandb.init()

    learning_rate = wandb.config.learning_rate
    batch_size = wandb.config.batch_size
    layers = wandb.config.layers
    epochs = wandb.config.epochs
    activation = wandb.config.activation

    activation = getattr(torch.nn, activation)

    # train_dl = DataLoader(train, batch_size=batch_size, shuffle=True, pin_memory=True)
    # validation_dl = DataLoader(validation, batch_size=batch_size, shuffle=False, pin_memory=True)

    model = get_dense_model(INPUT_SIZE, layers, OUTPUT_SIZE, activation)
    model = model.to(DEVICE)
    print(model)

    criterion = LOSS_FUNC()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    if (scheduler := wandb.config.lr_scheduler) is None:
        scheduler = None
    elif scheduler == "StepLR":
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, **wandb.config.lr_scheduler_kwargs)

    # wandb.watch(model, log_freq=100)

    for ep in tqdm(range(epochs)):
        print(ep, end=" ")

        wandb.log({"epoch": ep})

        model.train()
        train_one_epoch(model, optimizer, criterion, ep, batch_size)

        model.eval()
        evaluate_one_epoch(model, ep)

        if scheduler:
            scheduler.step()
            wandb.log({"lr": scheduler.get_last_lr()[-1]})

In [None]:
config = {
    "batch_size": 65536,
    "learning_rate": 0.0005,
    "lr_scheduler": "StepLR",
    "lr_scheduler_kwargs": {"step_size": 16, "gamma": 0.3},
    "layers": [512, 256],
    "activation": "PReLU",
    "estimate_quantile": ESTIMATE_QUANTILE,
    "dataset": DATASET,
    "epochs": 50,
}

wandb.init(project=f"MERRA2-{VARIABLE}-July2023", dir="wandb-local", config=config)


main()
wandb.finish()

In [65]:
wandb.finish()

In [None]:
sweep_configuration = {
    "method": "bayes",
    "name": f"sweep-{DATASET}",
    "metric": {
        "goal": "minimize",
        "name": "val_rmse"
    },
    "parameters": {
        "batch_size": {"values": [8192, 16384, 32768, 65536]},
        "learning_rate": {"max": 0.001, "min": 0.00005},
        "lr_scheduler": {"values": [None, "StepLR"]},
        "lr_scheduler_kwargs": {"parameters": {"step_size": {"max": 20, "min": 10},
                                               "gamma": {"max": 0.75, "min": 0.25}}},
        "layers": {"values": [(256, 128), (512, 256), (1024, 512),
                              (1024, 512, 256), (256, 256, 32), (512, 256, 128), (128, 64, 32)]},
        "epochs": {"value": 40},
        "activation": {"values": ["ReLU", "PReLU", "LeakyReLU", "ELU", "Softplus"]},
        "estimate_quantile": {"value": ESTIMATE_QUANTILE},
        "dataset": {"value": DATASET},
    },
    "early_terminate": {
        "type": "hyperband",
        "min_iter": 3,
        "eta": 2
    }
}

sweep_id = wandb.sweep(sweep=sweep_configuration, project=f"MERRA2-{VARIABLE}-July2023")
wandb.agent(sweep_id, function=main)

Create sweep with ID: 4zc8713b
Sweep URL: https://wandb.ai/bhavye-mathur/MERRA2-U-July2023/sweeps/4zc8713b


[34m[1mwandb[0m: Agent Starting Run: 3qe5o7u9 with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	batch_size: 16384
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [1024, 512]
[34m[1mwandb[0m: 	learning_rate: 0.00029286351315302873
[34m[1mwandb[0m: 	lr_scheduler: None
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.4672663126655401, 'step_size': 14}


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01675339513264286, max=1.0)…

Sequential(
  (0): Linear(in_features=15, out_features=1024, bias=True)
  (1): LeakyReLU(negative_slope=0.01)
  (2): Linear(in_features=1024, out_features=512, bias=True)
  (3): LeakyReLU(negative_slope=0.01)
  (4): Linear(in_features=512, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▅▄▃▅▅▅▆▆▆▆▅▅▅▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▁▁▁▁▁▁▃
train_rmse,█▅▄▃▅▅▅▆▆▆▆▅▅▅▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▁▁▁▁▁▁▃
val_mae,█▆▄▅▅▅▅▄▄▄▄▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▂▂▂▂▂▂▁▁▁▁▁
val_rmse,█▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,39.0
train_loss,0.00171
train_rmse,0.4063
val_mae,0.28509
val_rmse,0.40057


[34m[1mwandb[0m: Agent Starting Run: un7i9mey with config:
[34m[1mwandb[0m: 	activation: PReLU
[34m[1mwandb[0m: 	batch_size: 65536
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [1024, 512, 256]
[34m[1mwandb[0m: 	learning_rate: 0.0006919566436500085
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.6240807684379402, 'step_size': 16}


Sequential(
  (0): Linear(in_features=15, out_features=1024, bias=True)
  (1): PReLU(num_parameters=1)
  (2): Linear(in_features=1024, out_features=512, bias=True)
  (3): PReLU(num_parameters=1)
  (4): Linear(in_features=512, out_features=256, bias=True)
  (5): PReLU(num_parameters=1)
  (6): Linear(in_features=256, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,███████████████▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▁▁▁▁▁▁▁▁▁
train_loss,█▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▃▃▃▃▃▄▅▅▁▁▁▁▁▁▁▂
train_rmse,█▅▄▄▃▃▃▃▂▂▃▂▂▂▂▂▂▂▂▂▂▂▁▂▃▃▃▃▃▄▅▅▁▁▁▁▁▁▁▂
val_mae,▇▅▄▃▃▃▂▂▂▂▂▂▂▂█▇▁▁▁▂▁▂▂▁▂▁▁▂▂▂▁▁▁▁▁▁▁▁▁▁
val_rmse,█▅▄▄▃▃▃▃▂▂▂▂▂▂█▇▂▂▂▂▁▂▂▁▂▂▁▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
lr,0.00027
train_loss,0.00167
train_rmse,0.40221
val_mae,0.28535
val_rmse,0.40283


[34m[1mwandb[0m: Agent Starting Run: ovqhfxyy with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 16384
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [512, 256]
[34m[1mwandb[0m: 	learning_rate: 0.00047758850114439506
[34m[1mwandb[0m: 	lr_scheduler: None
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.6578316805180544, 'step_size': 17}


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016753114583358788, max=1.0…

Sequential(
  (0): Linear(in_features=15, out_features=512, bias=True)
  (1): ReLU()
  (2): Linear(in_features=512, out_features=256, bias=True)
  (3): ReLU()
  (4): Linear(in_features=256, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▄▅▇█
train_loss,█▄▃▃▂▂▂▂▂▂▁▂▁▂▂▂▁▂▂▂▁▂▂▂
train_rmse,█▄▃▃▂▂▂▂▂▂▁▂▁▂▂▂▁▂▂▂▁▂▂▂
val_mae,█▅▃▂▁▁
val_rmse,█▅▃▂▁▁

0,1
epoch,5.0
train_loss,0.00176
train_rmse,0.41288
val_mae,0.29275
val_rmse,0.41117


[34m[1mwandb[0m: Agent Starting Run: 9xm9pi4p with config:
[34m[1mwandb[0m: 	activation: ELU
[34m[1mwandb[0m: 	batch_size: 8192
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [512, 256, 128]
[34m[1mwandb[0m: 	learning_rate: 0.00026933664754704724
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.6549843224982597, 'step_size': 19}


Sequential(
  (0): Linear(in_features=15, out_features=512, bias=True)
  (1): ELU(alpha=1.0)
  (2): Linear(in_features=512, out_features=256, bias=True)
  (3): ELU(alpha=1.0)
  (4): Linear(in_features=256, out_features=128, bias=True)
  (5): ELU(alpha=1.0)
  (6): Linear(in_features=128, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,██████████████████▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▁▁▁
train_loss,█▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,▅▂▃█▇▅▃▂▂▂▂▂▂▂▂▃▂▂▂▁▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,▇▃▃█▇▄▃▂▂▂▂▂▂▂▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
lr,0.00012
train_loss,0.00167
train_rmse,0.40172
val_mae,0.28507
val_rmse,0.40271


[34m[1mwandb[0m: Agent Starting Run: nxlk4s8e with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	batch_size: 32768
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [1024, 512, 256]
[34m[1mwandb[0m: 	learning_rate: 0.0006561164426897387
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.3960938363331534, 'step_size': 12}


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01675203403380389, max=1.0)…

Sequential(
  (0): Linear(in_features=15, out_features=1024, bias=True)
  (1): LeakyReLU(negative_slope=0.01)
  (2): Linear(in_features=1024, out_features=512, bias=True)
  (3): LeakyReLU(negative_slope=0.01)
  (4): Linear(in_features=512, out_features=256, bias=True)
  (5): LeakyReLU(negative_slope=0.01)
  (6): Linear(in_features=256, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▅▆▇█
lr,▁▁▁▁▁▁
train_loss,█▆▄▃▃▂▂▃▃▁▂▂▃
train_rmse,█▆▄▃▃▂▂▃▃▁▂▂▃
val_mae,█▃▇▁▂▃
val_rmse,█▄▇▁▂▃

0,1
epoch,6.0
lr,0.00066
train_loss,0.00173
train_rmse,0.40879
val_mae,0.29994
val_rmse,0.41759


[34m[1mwandb[0m: Agent Starting Run: 91kbw68j with config:
[34m[1mwandb[0m: 	activation: PReLU
[34m[1mwandb[0m: 	batch_size: 32768
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [128, 64, 32]
[34m[1mwandb[0m: 	learning_rate: 0.000795640720244075
[34m[1mwandb[0m: 	lr_scheduler: None
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.28090896013334304, 'step_size': 15}


Sequential(
  (0): Linear(in_features=15, out_features=128, bias=True)
  (1): PReLU(num_parameters=1)
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): PReLU(num_parameters=1)
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): PReLU(num_parameters=1)
  (6): Linear(in_features=32, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.312405…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▅▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▅▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
train_loss,0.00164
train_rmse,0.39808
val_mae,0.28466
val_rmse,0.40232


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bh7136s0 with config:
[34m[1mwandb[0m: 	activation: ELU
[34m[1mwandb[0m: 	batch_size: 16384
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [512, 256, 128]
[34m[1mwandb[0m: 	learning_rate: 0.0006108562405785679
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.3836338687733365, 'step_size': 16}


Sequential(
  (0): Linear(in_features=15, out_features=512, bias=True)
  (1): ELU(alpha=1.0)
  (2): Linear(in_features=512, out_features=256, bias=True)
  (3): ELU(alpha=1.0)
  (4): Linear(in_features=256, out_features=128, bias=True)
  (5): ELU(alpha=1.0)
  (6): Linear(in_features=128, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,███████████████▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁▁
train_loss,█▁▁▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▂▁▂▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▄▃▅▄▃▃▃▂▃▃▂▃▂▂▃▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂
val_rmse,█▄▃▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
lr,9e-05
train_loss,0.00167
train_rmse,0.40227
val_mae,0.28549
val_rmse,0.40292


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: r9kxdneb with config:
[34m[1mwandb[0m: 	activation: Softplus
[34m[1mwandb[0m: 	batch_size: 65536
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [512, 256]
[34m[1mwandb[0m: 	learning_rate: 0.0005030230551648749
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.6638854532419923, 'step_size': 10}


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016751940284545224, max=1.0…

Sequential(
  (0): Linear(in_features=15, out_features=512, bias=True)
  (1): Softplus(beta=1, threshold=20)
  (2): Linear(in_features=512, out_features=256, bias=True)
  (3): Softplus(beta=1, threshold=20)
  (4): Linear(in_features=256, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,█████████▅▅▅▅▅▅▅▅▅▅▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁
train_loss,▇▂▂▁▁▁▁▁▁▆▁▁▁▁▁▁▁█▂▂▁▁▁▁▁▁▂▁▂▁▁▁▁▁▁▁▁▁▂▁
train_rmse,▇▂▂▁▁▁▁▁▂▆▁▁▁▁▁▁▁█▂▂▁▁▁▁▁▁▂▁▂▁▁▁▁▁▁▁▁▁▂▁
val_mae,▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
lr,0.0001
train_loss,0.0017
train_rmse,0.40539
val_mae,0.28484
val_rmse,0.40392


[34m[1mwandb[0m: Agent Starting Run: jm5zxf84 with config:
[34m[1mwandb[0m: 	activation: Softplus
[34m[1mwandb[0m: 	batch_size: 32768
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [256, 256, 32]
[34m[1mwandb[0m: 	learning_rate: 0.0008786687329154408
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.6374985225449903, 'step_size': 11}


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01675450000135849, max=1.0)…

Sequential(
  (0): Linear(in_features=15, out_features=256, bias=True)
  (1): Softplus(beta=1, threshold=20)
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): Softplus(beta=1, threshold=20)
  (4): Linear(in_features=256, out_features=32, bias=True)
  (5): Softplus(beta=1, threshold=20)
  (6): Linear(in_features=32, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,██████████▅▅▅▅▅▅▅▅▅▅▅▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
train_loss,▂▂█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,▂▂█▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,▃▆▄▆▂█▂▂▂▂▃▁▁▁▃▆▆▅▅▅▅▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,▃▆▅▇▃█▂▂▂▂▃▁▁▁▃▅▅▅▅▅▅▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
lr,0.00023
train_loss,0.00167
train_rmse,0.40193
val_mae,0.28739
val_rmse,0.40616


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7xqa7aqy with config:
[34m[1mwandb[0m: 	activation: ELU
[34m[1mwandb[0m: 	batch_size: 16384
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [128, 64, 32]
[34m[1mwandb[0m: 	learning_rate: 0.000482002785861538
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.6840544364795866, 'step_size': 14}


Sequential(
  (0): Linear(in_features=15, out_features=128, bias=True)
  (1): ELU(alpha=1.0)
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ELU(alpha=1.0)
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): ELU(alpha=1.0)
  (6): Linear(in_features=32, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,█████████████▄▄▄▄▄▄▄▄▄▄▄▄▄▄▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
lr,0.00023
train_loss,0.00168
train_rmse,0.40347
val_mae,0.28444
val_rmse,0.40245


[34m[1mwandb[0m: Agent Starting Run: tufv20v6 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 65536
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [128, 64, 32]
[34m[1mwandb[0m: 	learning_rate: 0.0003595714222954297
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.299082484554385, 'step_size': 15}


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016752447217004373, max=1.0…

Sequential(
  (0): Linear(in_features=15, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): ReLU()
  (6): Linear(in_features=32, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,██████████████▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
lr,3e-05
train_loss,0.00179
train_rmse,0.41554
val_mae,0.29828
val_rmse,0.41781


[34m[1mwandb[0m: Agent Starting Run: y64122j3 with config:
[34m[1mwandb[0m: 	activation: Softplus
[34m[1mwandb[0m: 	batch_size: 32768
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [512, 256]
[34m[1mwandb[0m: 	learning_rate: 0.000150138230662286
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.4726081524550684, 'step_size': 13}


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01675158195042362, max=1.0)…

Sequential(
  (0): Linear(in_features=15, out_features=512, bias=True)
  (1): Softplus(beta=1, threshold=20)
  (2): Linear(in_features=512, out_features=256, bias=True)
  (3): Softplus(beta=1, threshold=20)
  (4): Linear(in_features=256, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,████████████▄▄▄▄▄▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁
train_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▄▂▂▂▁▁▁▁▁▂▄▂▁▁▁▁▁▁▂▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▄▂▂▂▂▁▁▁▁▂▃▂▁▁▁▁▁▁▁▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
lr,2e-05
train_loss,0.00165
train_rmse,0.39888
val_mae,0.28402
val_rmse,0.40312


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: l4wtblmw with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 8192
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [128, 64, 32]
[34m[1mwandb[0m: 	learning_rate: 0.0005539775542308048
[34m[1mwandb[0m: 	lr_scheduler: None
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.30799297078466564, 'step_size': 13}


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01672521389943237, max=1.0)…

Sequential(
  (0): Linear(in_features=15, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): ReLU()
  (6): Linear(in_features=32, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▄▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▅▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▅▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
train_loss,0.00166
train_rmse,0.40034
val_mae,0.2843
val_rmse,0.40032


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ca732694 with config:
[34m[1mwandb[0m: 	activation: Softplus
[34m[1mwandb[0m: 	batch_size: 16384
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [512, 256, 128]
[34m[1mwandb[0m: 	learning_rate: 0.00042580943394096137
[34m[1mwandb[0m: 	lr_scheduler: None
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.4528745679420356, 'step_size': 11}


Sequential(
  (0): Linear(in_features=15, out_features=512, bias=True)
  (1): Softplus(beta=1, threshold=20)
  (2): Linear(in_features=512, out_features=256, bias=True)
  (3): Softplus(beta=1, threshold=20)
  (4): Linear(in_features=256, out_features=128, bias=True)
  (5): Softplus(beta=1, threshold=20)
  (6): Linear(in_features=128, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,▅▃█▄▇▃▆▇▅▄▄▂▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▃▃▃▃▃▁
train_rmse,▅▃█▅▇▃▆▇▅▄▄▂▂▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▄▄▄▄▄▄▄▄▃▃▃▁
val_mae,█▂▃▂▁▄▂▆▄▁▁▂▁▂▂▁▁▁▁▁▁▁▁▁▁▂▂▃▃▃▃▂▂▂▂▂▂▂▂▂
val_rmse,█▂▃▂▁▄▂▅▄▁▁▂▁▂▂▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂

0,1
epoch,39.0
train_loss,0.00169
train_rmse,0.4044
val_mae,0.2881
val_rmse,0.40673


[34m[1mwandb[0m: Agent Starting Run: r4aryhat with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	batch_size: 65536
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [256, 256, 32]
[34m[1mwandb[0m: 	learning_rate: 0.0006419165500190383
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.44506117433333825, 'step_size': 18}


Sequential(
  (0): Linear(in_features=15, out_features=256, bias=True)
  (1): LeakyReLU(negative_slope=0.01)
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): LeakyReLU(negative_slope=0.01)
  (4): Linear(in_features=256, out_features=32, bias=True)
  (5): LeakyReLU(negative_slope=0.01)
  (6): Linear(in_features=32, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 

VBox(children=(Label(value='0.001 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.279427…

0,1
epoch,▁▂▃▃▄▅▆▆▇█
lr,▁▁▁▁▁▁▁▁▁
train_loss,█▄▃▂▂▂▂▁▂
train_rmse,█▄▃▂▂▂▂▁▂
val_mae,█▄▃▂▂▁▁▃▁
val_rmse,█▄▃▂▂▁▁▃▁

0,1
epoch,9.0
lr,0.00064
train_loss,0.0019
train_rmse,0.42816
val_mae,0.30514
val_rmse,0.42445


[34m[1mwandb[0m: Agent Starting Run: nfp0qrey with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 65536
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [512, 256]
[34m[1mwandb[0m: 	learning_rate: 0.0002923243896446955
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.4682702852380192, 'step_size': 10}


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016753114583358788, max=1.0…

Sequential(
  (0): Linear(in_features=15, out_features=512, bias=True)
  (1): ReLU()
  (2): Linear(in_features=512, out_features=256, bias=True)
  (3): ReLU()
  (4): Linear(in_features=256, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▃▄▅▆▇█
lr,▁▁▁▁▁▁▁
train_loss,█▃▂▂▁▁▁
train_rmse,█▃▂▂▁▁▁
val_mae,█▄▂▂▂▁▁
val_rmse,█▃▂▂▁▁▁

0,1
epoch,7.0
lr,0.00029
train_loss,0.00182
train_rmse,0.41984
val_mae,0.30109
val_rmse,0.42065


[34m[1mwandb[0m: Agent Starting Run: azuuqgy3 with config:
[34m[1mwandb[0m: 	activation: PReLU
[34m[1mwandb[0m: 	batch_size: 32768
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [512, 256]
[34m[1mwandb[0m: 	learning_rate: 0.00024487771261780884
[34m[1mwandb[0m: 	lr_scheduler: None
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.3821690991327999, 'step_size': 14}


Sequential(
  (0): Linear(in_features=15, out_features=512, bias=True)
  (1): PReLU(num_parameters=1)
  (2): Linear(in_features=512, out_features=256, bias=True)
  (3): PReLU(num_parameters=1)
  (4): Linear(in_features=256, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁
train_rmse,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁
val_mae,█▅▄▃▃▃▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▅▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
train_loss,0.00163
train_rmse,0.39655
val_mae,0.28571
val_rmse,0.40309


[34m[1mwandb[0m: Agent Starting Run: 4ahggvr0 with config:
[34m[1mwandb[0m: 	activation: ELU
[34m[1mwandb[0m: 	batch_size: 32768
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [1024, 512]
[34m[1mwandb[0m: 	learning_rate: 0.0003217930021977126
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.4575602205633496, 'step_size': 12}


Sequential(
  (0): Linear(in_features=15, out_features=1024, bias=True)
  (1): ELU(alpha=1.0)
  (2): Linear(in_features=1024, out_features=512, bias=True)
  (3): ELU(alpha=1.0)
  (4): Linear(in_features=512, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▅▆█
lr,▁▁▁▁
train_loss,█▆▄▃▂▂▁▁
train_rmse,█▆▄▃▂▂▁▁
val_mae,█▄▂▁
val_rmse,█▄▂▁

0,1
epoch,4.0
lr,0.00032
train_loss,0.00174
train_rmse,0.40971
val_mae,0.29058
val_rmse,0.41273


[34m[1mwandb[0m: Agent Starting Run: h18qoj81 with config:
[34m[1mwandb[0m: 	activation: Softplus
[34m[1mwandb[0m: 	batch_size: 32768
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [512, 256, 128]
[34m[1mwandb[0m: 	learning_rate: 0.0005786151982267778
[34m[1mwandb[0m: 	lr_scheduler: None
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.2894442525796056, 'step_size': 20}


Sequential(
  (0): Linear(in_features=15, out_features=512, bias=True)
  (1): Softplus(beta=1, threshold=20)
  (2): Linear(in_features=512, out_features=256, bias=True)
  (3): Softplus(beta=1, threshold=20)
  (4): Linear(in_features=256, out_features=128, bias=True)
  (5): Softplus(beta=1, threshold=20)
  (6): Linear(in_features=128, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 

0,1
epoch,▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇██
train_loss,█▂▂▂▁▁▁▁▄▁▁▆▁▁▂▁▁▁▄▁▁▁▃▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁
train_rmse,█▂▂▂▂▂▁▁▄▁▁▆▁▂▃▁▁▁▄▁▁▁▄▁▁▁▁▁▁▃▁▁▁▂▁▁▁▁▁▁
val_mae,▃▂▄▂▁▄▂▇▃█▃▇▅▆▄▆▅▄▅▃▆
val_rmse,▄▂▅▂▁▄▁▇▃█▃▇▅▆▄▆▅▄▅▃▆

0,1
epoch,21.0
train_loss,0.00169
train_rmse,0.40405
val_mae,0.3331
val_rmse,0.44949


[34m[1mwandb[0m: Agent Starting Run: un587tju with config:
[34m[1mwandb[0m: 	activation: PReLU
[34m[1mwandb[0m: 	batch_size: 8192
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [128, 64, 32]
[34m[1mwandb[0m: 	learning_rate: 9.815487000458688e-05
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.4263014675003328, 'step_size': 13}


Sequential(
  (0): Linear(in_features=15, out_features=128, bias=True)
  (1): PReLU(num_parameters=1)
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): PReLU(num_parameters=1)
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): PReLU(num_parameters=1)
  (6): Linear(in_features=32, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.005 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.210665…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,████████████▄▄▄▄▄▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁
train_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
lr,1e-05
train_loss,0.00168
train_rmse,0.40296
val_mae,0.28534
val_rmse,0.40333


[34m[1mwandb[0m: Agent Starting Run: 9wwpqnbi with config:
[34m[1mwandb[0m: 	activation: Softplus
[34m[1mwandb[0m: 	batch_size: 8192
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [1024, 512]
[34m[1mwandb[0m: 	learning_rate: 0.0002537519193903631
[34m[1mwandb[0m: 	lr_scheduler: None
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.4950834470693452, 'step_size': 16}


Sequential(
  (0): Linear(in_features=15, out_features=1024, bias=True)
  (1): Softplus(beta=1, threshold=20)
  (2): Linear(in_features=1024, out_features=512, bias=True)
  (3): Softplus(beta=1, threshold=20)
  (4): Linear(in_features=512, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,▁█▁▂▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,▂▂▁▁▆▅▁█▅▅▂▁▁▁▁▂▂▂▂▃▁▁▁▁▂▂▂▃▃▃▃▄▄▄▃▃▂▂▁▁
val_rmse,▂▂▁▁▆▅▂█▅▅▂▁▁▁▁▂▂▂▂▃▁▁▁▁▂▂▂▃▃▃▃▄▄▃▃▃▂▂▁▁

0,1
epoch,39.0
train_loss,0.00167
train_rmse,0.40226
val_mae,0.28488
val_rmse,0.4034


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4pn254xb with config:
[34m[1mwandb[0m: 	activation: PReLU
[34m[1mwandb[0m: 	batch_size: 16384
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [256, 256, 32]
[34m[1mwandb[0m: 	learning_rate: 0.0008352423724351961
[34m[1mwandb[0m: 	lr_scheduler: None
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.7242414268703111, 'step_size': 12}


Sequential(
  (0): Linear(in_features=15, out_features=256, bias=True)
  (1): PReLU(num_parameters=1)
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): PReLU(num_parameters=1)
  (4): Linear(in_features=256, out_features=32, bias=True)
  (5): PReLU(num_parameters=1)
  (6): Linear(in_features=32, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.223542…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▄▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂
train_rmse,█▄▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂
val_mae,█▅▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▂▁▁▁▁▁▁▁
val_rmse,█▅▄▄▃▃▃▃▃▃▃▃▂▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▂▁▁▁▁▁▁▁

0,1
epoch,39.0
train_loss,0.0017
train_rmse,0.40577
val_mae,0.28456
val_rmse,0.40129


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 061nngfx with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	batch_size: 65536
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [1024, 512, 256]
[34m[1mwandb[0m: 	learning_rate: 0.0008116518223746423
[34m[1mwandb[0m: 	lr_scheduler: None
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.4410262005220056, 'step_size': 20}


Sequential(
  (0): Linear(in_features=15, out_features=1024, bias=True)
  (1): ReLU()
  (2): Linear(in_features=1024, out_features=512, bias=True)
  (3): ReLU()
  (4): Linear(in_features=512, out_features=256, bias=True)
  (5): ReLU()
  (6): Linear(in_features=256, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 

0,1
epoch,▁▁▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
train_loss,▂▁▂▃▁▁▁▁▁▁▁▁▁▂█▁▁▁▁▁▁▁▂▁▁
train_rmse,▂▂▂▃▁▁▁▁▁▁▁▁▁▂█▁▁▂▁▁▁▁▂▁▁
val_mae,▂▁▁▂▁▁▁▁▁▂▁▁▂▁▁▁▂█▁▁▁▂▁▁▁
val_rmse,▂▂▁▂▁▁▁▁▁▂▁▁▂▁▁▁▂█▁▁▁▂▁▁▁

0,1
epoch,25.0
train_loss,0.00172
train_rmse,0.40735
val_mae,0.28921
val_rmse,0.4068


[34m[1mwandb[0m: Agent Starting Run: 45y9fomy with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	batch_size: 16384
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [1024, 512, 256]
[34m[1mwandb[0m: 	learning_rate: 0.0006584717358943737
[34m[1mwandb[0m: 	lr_scheduler: None
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.58121199793712, 'step_size': 10}


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016752872216360022, max=1.0…

Sequential(
  (0): Linear(in_features=15, out_features=1024, bias=True)
  (1): LeakyReLU(negative_slope=0.01)
  (2): Linear(in_features=1024, out_features=512, bias=True)
  (3): LeakyReLU(negative_slope=0.01)
  (4): Linear(in_features=512, out_features=256, bias=True)
  (5): LeakyReLU(negative_slope=0.01)
  (6): Linear(in_features=256, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,███▆▇▄▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▂
train_rmse,███▆▇▄▃▄▃▃▃▃▃▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▂
val_mae,▅██▇▅▃▄▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▂▂▂▃▃▂▁▁▁▁▁▁▁▁▁▁
val_rmse,▆██▇▆▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
train_loss,0.0017
train_rmse,0.40532
val_mae,0.28443
val_rmse,0.39889


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ikp25a4g with config:
[34m[1mwandb[0m: 	activation: ELU
[34m[1mwandb[0m: 	batch_size: 16384
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [512, 256, 128]
[34m[1mwandb[0m: 	learning_rate: 0.0009283572882367396
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.7360989796055823, 'step_size': 10}


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016752370132599025, max=1.0…

Sequential(
  (0): Linear(in_features=15, out_features=512, bias=True)
  (1): ELU(alpha=1.0)
  (2): Linear(in_features=512, out_features=256, bias=True)
  (3): ELU(alpha=1.0)
  (4): Linear(in_features=256, out_features=128, bias=True)
  (5): ELU(alpha=1.0)
  (6): Linear(in_features=128, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▂▂▃▃▄▄▅▅▆▆▇▇█
lr,█████████▁▁▁▁
train_loss,█▂▂▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▂▁▁▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▂▂▁▁▁▁▁▁▁▂▁▁▁▁▂▁▁▂▁▁▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,▃█▇▇▇▆▆▃▃▄▁▁▁▁
val_rmse,▅█▇▇▇▅▆▃▃▄▂▁▁▁

0,1
epoch,13.0
lr,0.00068
train_loss,0.0017
train_rmse,0.40545
val_mae,0.28828
val_rmse,0.40763


[34m[1mwandb[0m: Agent Starting Run: 0xr9ir94 with config:
[34m[1mwandb[0m: 	activation: PReLU
[34m[1mwandb[0m: 	batch_size: 8192
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [256, 128]
[34m[1mwandb[0m: 	learning_rate: 0.0007388587546451191
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.7328372985121936, 'step_size': 11}


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016689457633765413, max=1.0…

Sequential(
  (0): Linear(in_features=15, out_features=256, bias=True)
  (1): PReLU(num_parameters=1)
  (2): Linear(in_features=256, out_features=128, bias=True)
  (3): PReLU(num_parameters=1)
  (4): Linear(in_features=128, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,██████████▅▅▅▅▅▅▅▅▅▅▅▃▃▃▃▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁
train_loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
val_mae,█▅▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▅▄▃▃▃▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
lr,0.00029
train_loss,0.00166
train_rmse,0.40062
val_mae,0.28514
val_rmse,0.40193


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
500 response executing GraphQL.
{"errors":[{"message":"Post \"http://anaconda2.default.svc.cluster.local/search\": read tcp 10.52.2.5:47384-\u003e10.55.247.53:80: read: connection reset by peer","path":["agentHeartbeat"]}],"data":{"agentHeartbeat":null}}
[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: Post "http://anaconda2.default.svc.cluster.local/search": read tcp 10.52.2.5:47384->10.55.247.53:80: read: connection reset by peer (<Response [500]>)
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pwf9mxzt with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	batch_size: 16384
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [512, 256]
[34m[1mwandb[0m: 	learning_rate: 0.00029090590377751826
[34m[1mwandb[0m: 	lr_scheduler: None
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'

Sequential(
  (0): Linear(in_features=15, out_features=512, bias=True)
  (1): LeakyReLU(negative_slope=0.01)
  (2): Linear(in_features=512, out_features=256, bias=True)
  (3): LeakyReLU(negative_slope=0.01)
  (4): Linear(in_features=256, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▄▄▅▅▆▆▇▇██
train_loss,█▄▃▂▂▂▂▂▂▁▂▂▂▂▁▁▁▁▂▁▁▂▁▂▁▁▁▁▂▁▁▁▁▂▁▁▁▁▁▁
train_rmse,█▄▃▃▂▂▂▂▂▂▂▂▂▂▁▁▂▂▂▁▁▂▂▂▁▁▁▁▂▁▁▁▁▂▁▁▁▁▂▁
val_mae,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁
val_rmse,█▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁

0,1
epoch,15.0
train_loss,0.0017
train_rmse,0.4059
val_mae,0.2875
val_rmse,0.40543


[34m[1mwandb[0m: Agent Starting Run: ewpcfomy with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	batch_size: 65536
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [256, 128]
[34m[1mwandb[0m: 	learning_rate: 0.000861933483612348
[34m[1mwandb[0m: 	lr_scheduler: None
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.3998468115240451, 'step_size': 10}


Sequential(
  (0): Linear(in_features=15, out_features=256, bias=True)
  (1): LeakyReLU(negative_slope=0.01)
  (2): Linear(in_features=256, out_features=128, bias=True)
  (3): LeakyReLU(negative_slope=0.01)
  (4): Linear(in_features=128, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
train_loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁
train_rmse,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁
val_mae,█▅▄▃▂▂▂▂▂▁▁▁▁▁▁▁
val_rmse,█▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch,16.0
train_loss,0.0018
train_rmse,0.41754
val_mae,0.29513
val_rmse,0.41389


[34m[1mwandb[0m: Agent Starting Run: x0cvdyhx with config:
[34m[1mwandb[0m: 	activation: Softplus
[34m[1mwandb[0m: 	batch_size: 32768
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [256, 256, 32]
[34m[1mwandb[0m: 	learning_rate: 0.0008061622150643143
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.32225873876759914, 'step_size': 13}


Sequential(
  (0): Linear(in_features=15, out_features=256, bias=True)
  (1): Softplus(beta=1, threshold=20)
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): Softplus(beta=1, threshold=20)
  (4): Linear(in_features=256, out_features=32, bias=True)
  (5): Softplus(beta=1, threshold=20)
  (6): Linear(in_features=32, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.312405…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,████████████▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁
train_loss,█▂▂▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▂▂▁▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,▃▂█▆▁▂▄▄▃▂▄▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,▄▃█▆▂▂▄▄▃▂▄▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
lr,3e-05
train_loss,0.00164
train_rmse,0.39859
val_mae,0.28396
val_rmse,0.40294


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: m7vygsyn with config:
[34m[1mwandb[0m: 	activation: PReLU
[34m[1mwandb[0m: 	batch_size: 65536
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [128, 64, 32]
[34m[1mwandb[0m: 	learning_rate: 0.0006616254656780672
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.2780186830927862, 'step_size': 16}


Sequential(
  (0): Linear(in_features=15, out_features=128, bias=True)
  (1): PReLU(num_parameters=1)
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): PReLU(num_parameters=1)
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): PReLU(num_parameters=1)
  (6): Linear(in_features=32, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.273592…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,███████████████▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁▁
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
lr,5e-05
train_loss,0.00173
train_rmse,0.40902
val_mae,0.29125
val_rmse,0.40967


[34m[1mwandb[0m: Agent Starting Run: 9wgqejke with config:
[34m[1mwandb[0m: 	activation: ELU
[34m[1mwandb[0m: 	batch_size: 16384
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [256, 256, 32]
[34m[1mwandb[0m: 	learning_rate: 0.0004073950620156513
[34m[1mwandb[0m: 	lr_scheduler: None
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.53322794092516, 'step_size': 12}


Sequential(
  (0): Linear(in_features=15, out_features=256, bias=True)
  (1): ELU(alpha=1.0)
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): ELU(alpha=1.0)
  (4): Linear(in_features=256, out_features=32, bias=True)
  (5): ELU(alpha=1.0)
  (6): Linear(in_features=32, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
train_loss,0.00169
train_rmse,0.40411
val_mae,0.28477
val_rmse,0.40226


[34m[1mwandb[0m: Agent Starting Run: 2l0pedtp with config:
[34m[1mwandb[0m: 	activation: Softplus
[34m[1mwandb[0m: 	batch_size: 16384
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [256, 256, 32]
[34m[1mwandb[0m: 	learning_rate: 0.0005128254339268629
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.5103039436361085, 'step_size': 14}


Sequential(
  (0): Linear(in_features=15, out_features=256, bias=True)
  (1): Softplus(beta=1, threshold=20)
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): Softplus(beta=1, threshold=20)
  (4): Linear(in_features=256, out_features=32, bias=True)
  (5): Softplus(beta=1, threshold=20)
  (6): Linear(in_features=32, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,█████████████▃▃▃▃▃▃▃▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,▅▂▃▂▂▅█▃▃▁▁▁▁▁▁▁▁▂▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,▅▂▄▃▂▅█▃▃▁▁▁▁▁▁▁▁▃▃▃▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,▄█▂▃▄▃▃▁▁▁▂▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,▆█▃▃▃▃▂▁▁▁▂▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
lr,0.00013
train_loss,0.00169
train_rmse,0.4044
val_mae,0.28499
val_rmse,0.40396


[34m[1mwandb[0m: Agent Starting Run: 688ll3n4 with config:
[34m[1mwandb[0m: 	activation: ELU
[34m[1mwandb[0m: 	batch_size: 16384
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [512, 256, 128]
[34m[1mwandb[0m: 	learning_rate: 0.0005842355097145116
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.5873073101732551, 'step_size': 12}


Sequential(
  (0): Linear(in_features=15, out_features=512, bias=True)
  (1): ELU(alpha=1.0)
  (2): Linear(in_features=512, out_features=256, bias=True)
  (3): ELU(alpha=1.0)
  (4): Linear(in_features=256, out_features=128, bias=True)
  (5): ELU(alpha=1.0)
  (6): Linear(in_features=128, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,███████████▄▄▄▄▄▄▄▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁
train_loss,█▁▂▃▂▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▁▂▃▃▃▃▂▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▄█▆▅▄▃▂▂▂▂▂▁▂▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▃▆▅▄▄▃▂▂▂▂▂▁▂▃▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
lr,0.00012
train_loss,0.00168
train_rmse,0.40269
val_mae,0.28418
val_rmse,0.40165


[34m[1mwandb[0m: Agent Starting Run: jto5lmxg with config:
[34m[1mwandb[0m: 	activation: ELU
[34m[1mwandb[0m: 	batch_size: 16384
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [512, 256, 128]
[34m[1mwandb[0m: 	learning_rate: 0.0007315384241978428
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.5831069055937542, 'step_size': 14}


Sequential(
  (0): Linear(in_features=15, out_features=512, bias=True)
  (1): ELU(alpha=1.0)
  (2): Linear(in_features=512, out_features=256, bias=True)
  (3): ELU(alpha=1.0)
  (4): Linear(in_features=256, out_features=128, bias=True)
  (5): ELU(alpha=1.0)
  (6): Linear(in_features=128, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
lr,█████████████▄▄▄▄▄▄▄▄▄▄▄▄▄▄▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▂▃▃▄▄▃▃▃▂▂▂▂▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_rmse,█▂▃▄▄▄▃▃▃▂▂▂▂▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_mae,█▄█▇▆▆▅▅▃▂▂▄▃▃▁▂▂▂▂▂▂▂▂▂▂▂▂▃▂▂▁▁▁▁▁▁▁▁▁▁
val_rmse,█▄▆▆▅▅▄▄▃▂▂▄▃▃▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,39.0
lr,0.00025
train_loss,0.00168
train_rmse,0.40271
val_mae,0.28434
val_rmse,0.40178


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pahhvh3k with config:
[34m[1mwandb[0m: 	activation: ELU
[34m[1mwandb[0m: 	batch_size: 8192
[34m[1mwandb[0m: 	dataset: NGCT
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	estimate_quantile: 0.9935
[34m[1mwandb[0m: 	layers: [128, 64, 32]
[34m[1mwandb[0m: 	learning_rate: 0.0004775143714830786
[34m[1mwandb[0m: 	lr_scheduler: StepLR
[34m[1mwandb[0m: 	lr_scheduler_kwargs: {'gamma': 0.4866615041451468, 'step_size': 16}


Sequential(
  (0): Linear(in_features=15, out_features=128, bias=True)
  (1): ELU(alpha=1.0)
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ELU(alpha=1.0)
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): ELU(alpha=1.0)
  (6): Linear(in_features=32, out_features=1, bias=True)
)


  0%|          | 0/40 [00:00<?, ?it/s]

0 1 

In [None]:
def test(model, dl):
    model.eval()

    mse = 0
    mae = 0

    with torch.no_grad():
        for inputs, targets in tqdm(dl):
            prediction = model(inputs).squeeze()

            mse += torch.nn.functional.mse_loss(prediction, targets)
            mae += torch.nn.functional.l1_loss(prediction, targets)

    return (mse / len(dl)) ** 0.5 * stds[VARIABLE], (mae / len(dl)) * stds[VARIABLE]


test_dl = DataLoader(test, batch_size=2048, shuffle=False)
test_rmse, test_mae = test(model, test)

print(f"RMSE: {test_rmse} m/s")
print(f"MAE:  {test_mae} m/s")
