In [3]:
from torchdiffeq import odeint_adjoint as odeint
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [42]:
import pickle
import pprint
import torch
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
import torchviz
import einops
torch.cuda.is_available()

True

In [5]:
import math
import numpy as np
from IPython.display import clear_output
from tqdm import tqdm_notebook as tqdm

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.color_palette("bright")
import matplotlib as mpl
import matplotlib.cm as cm

import torch
from torch import Tensor
from torch import nn
from torch.nn import functional as F 
from torch.autograd import Variable

from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split


use_cuda = torch.cuda.is_available()
device = 'cuda' if torch.cuda.is_available() else 'cpu' 

In [6]:
sweep_config = {
    'method': 'random'
    }
    
metric = {
    'name': 'loss',
    'goal': 'minimize'   
    }

sweep_config['metric'] = metric    

In [55]:
parameters_dict = {
    # dataset params
    'dataset_type': {'value': 'hyst_saved'}, 
    # fixed params
    'optimizer': {'value': 'adam'},
    # architecture params
    'arch_in_dim' : {'value': 2},
    'arch_hid_dim' : {'values': [2, 4, 8, 16]},
    # learning params
    'epochs' : {'values': [1000, 3000, 10000]},
    'batch_size' : {'values': [16, 32, 64, 128]},
    'learning_rate' : {'value': 0.01},
    # neural ODE specific params
    'solver_type' : {'values' : ['euler', 'rk4', 'dopri5', 'bosh3']},
    'time_idependent_num_ts' : {'values' : [2, 5, 10, 50]},
    'return_whole_sequence' : {'value' : False},

    # 'fc_layer_size': {
    #     'values': [128, 256, 512]
    #     },
    # 'dropout': {
    #       'values': [0.3, 0.4, 0.5]
    #     },
    }

sweep_config['parameters'] = parameters_dict

pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'loss'},
 'parameters': {'arch_hid_dim': {'values': [2, 4, 8, 16]},
                'arch_in_dim': {'value': 2},
                'batch_size': {'values': [16, 32, 64, 128]},
                'dataset_type': {'value': 'hyst_saved'},
                'epochs': {'values': [1000, 3000, 10000]},
                'learning_rate': {'value': 0.01},
                'optimizer': {'value': 'adam'},
                'return_whole_sequence': {'value': False},
                'solver_type': {'values': ['euler', 'rk4', 'dopri5', 'bosh3']},
                'time_idependent_num_ts': {'values': [2, 5, 10, 50]}}}


Adaptive-step:
- dopri8 Runge-Kutta 7(8) of Dormand-Prince-Shampine
- dopri5 Runge-Kutta 4(5) of Dormand-Prince [default].
- bosh3 Runge-Kutta 2(3) of Bogacki-Shampine
- adaptive_heun Runge-Kutta 1(2)

Fixed-step:
- euler Euler method.
- midpoint Midpoint method.
- rk4 Fourth-order Runge-Kutta with 3/8 rule.
- explicit_adams Explicit Adams.
- implicit_adams Implicit Adams.

In [81]:
# config.dataset_type
from data_utils import load_existing_hyst_dataset, create_hyst_dataset
from models import ODEFunc
from tqdm.notebook import tqdm



criterion = torch.nn.functional.mse_loss

def train(project='test-neuralODE-sweep', config=None):
    # Initialize a new wandb run
    with wandb.init(config=config):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config
        
        trainloader, testloader, full_X, full_y, no_noise_y = build_dataset(config.dataset_type, config.batch_size)
        network = build_network(config.arch_in_dim, config.arch_hid_dim)
        optimizer = build_optimizer(network, config.optimizer, config.learning_rate)
        print(optimizer)
        # pred_y = odeint(func, batch_y0, batch_t, method=config.solver_type, adjoint_method=config.solver_type).to(device)
        train_epochs(model=network,
                      train_loader=trainloader,
                       test_loader=testloader,
                         criterion=criterion,
                           optimizer=optimizer,  
                           full_X=full_X,
                             full_y=full_y, 
                             config=config)
        # model, train_loader, test_loader, criterion, optimizer, full_X, full_y, config
        # for epoch in range(config.epochs):
        #     avg_loss = train_epoch(network, trainloader, testloader, optimizer, solver_type=config.solver_type)
        #     wandb.log({"loss": avg_loss, "epoch": epoch})

def build_dataset(which, batch_size):
    """
    which: 'hyst', 'hyst_saved', 'experimental', 'experimental_saved'
    """
    if which=='hyst':
        train_dataset, test_dataset, X_full, y_full, no_noise_y  = create_hyst_dataset()
    elif which=='hyst_saved':
        train_dataset, test_dataset, X_full, y_full, no_noise_y = load_existing_hyst_dataset() 

    trainloader = torch.utils.data.DataLoader(
                  train_dataset, 
                  batch_size=batch_size)
    testloader = torch.utils.data.DataLoader(
                    test_dataset,
                    batch_size=batch_size)

    return trainloader, testloader, X_full, y_full, no_noise_y


def build_network(in_dim, hid_dim):
    network = ODEFunc(in_dim=in_dim, hid_dim=hid_dim)

    return network.to(device)
        

def build_optimizer(network, optimizer, learning_rate):
    if optimizer == "sgd":
        optimizer = torch.optim.SGD(network.parameters(),
                              lr=learning_rate, momentum=0.9)
    elif optimizer == "adam":
        optimizer = torch.optim.Adam(network.parameters(),
                               lr=learning_rate)
    return optimizer


def train_epoch(network, train_loader, test_loader, optimizer, config):
    cumu_loss = 0
    for _, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()

        # ➡ Forward pass
        loss = F.nll_loss(network(data), target)
        cumu_loss += loss.item()

        # ⬅ Backward pass + weight update
        loss.backward()
        optimizer.step()

        wandb.log({"batch MSE": loss.item()})

    return cumu_loss / len(train_loader) 

def train_epochs(model, train_loader, test_loader, criterion, optimizer, full_X, full_y, config):
        # Tell wandb to watch what the model gets up to: gradients, weights, and more!
    """
    includes:
    learning for n epochs
    logging train loss every epoch
    testing on val_set every epoch
    logging test loss every epoch

    saving weights for best epoch
    logging visualisation to wandb for best epoch

    """

    wandb.watch(model, criterion, log="all", log_freq=10)

    # time to input to odeint
    t = torch.linspace(0, 1, config.time_idependent_num_ts).to(device)
    # Run training and track with wandb
    total_batches = len(train_loader) * config.epochs
    example_ct = 0  # number of examples seen
    batch_ct = 0
    min_val_loss = 1000
    for epoch in tqdm(range(config.epochs)):
        for _, (x, y) in enumerate(train_loader):

            loss = train_batch_ode(x, y, t, model, optimizer, criterion, config)
            example_ct +=  len(x)
            batch_ct += 1

            # Report metrics every 25th batch
            if ((batch_ct + 1) % 100) == 0:
                print(f'logging: {epoch=}, {loss=}, {example_ct=}')
                wandb.log({"epoch": epoch, "train_mse": loss}, step=example_ct)
        
        if (epoch + 1) % 100 == 0:
            with torch.no_grad():
                val_loss = test_log_vis(model,
                                         t,
                                           test_loader,
                                            train_loader,
                                             epoch,
                                               example_ct,
                                                min_val_loss,
                                                full_X, full_y,
                                                  config)
                if val_loss < min_val_loss:
                    min_val_loss = val_loss         

def train_batch_ode(x, y, t, model, optimizer, criterion, config):
    # print('HUI')  
    # print(optimizer)  
    optimizer.zero_grad()
    x, y = x.to(device), y.to(device)
        
    # Forward pass ➡
    pred_y = odeint(model, y0=x, t=t, method=config.solver_type, adjoint_method=config.solver_type).to(device)
    if not config.return_whole_sequence:
        pred_y=pred_y[-1]
        
    loss = criterion(pred_y, y)
    
    # Backward pass ⬅
    loss.backward()

    # Step with optimizer
    optimizer.step()
    return loss

def test_log_vis(model, t, test_loader, train_loader, epoch, example_ct, min_val_loss, full_X, full_y, config):
    with torch.no_grad():
        val_loss = 0 

        X_test = []
        y_test = []
        t.to(device)
        for _, (x, y) in enumerate(test_loader): 
            x, y = x.to(device), y.to(device)

            pred_y = odeint(model, t=t, y0=x, method=config.solver_type, adjoint_method=config.solver_type).to(device)
            if not config.return_whole_sequence:
                pred_y=pred_y[-1]
            loss = criterion(pred_y, y)
            val_loss+=loss

            X_test.append(x)
            y_test.append(y)

        val_loss /= len(test_loader)
        val_loss = val_loss.item()    

        wandb.log({"epoch": epoch, "test_mse": val_loss}, step=example_ct) 
        # visualising:
        if val_loss < min_val_loss: 
            """ X_train y_train X_test y_test X_full, y_full"""
            X_train = []
            y_train = []
            for x, y in train_loader:
                X_train.append(x)
                y_train.append(y)
            X_train = torch.cat(X_train, dim=0)
            X_test = torch.cat(X_test, dim=0)
            y_train = torch.cat(y_train, dim=0)
            y_test = torch.cat(y_test, dim=0)
            full_X = torch.Tensor(full_X).to(device)



            z_full = odeint(model, t=t.to(device), y0=full_X.to(device), method=config.solver_type, adjoint_method=config.solver_type).to(device)
            if not config.return_whole_sequence:
                z_full=z_full[-1].cpu()
            full_X=full_X.cpu()
            X_train=X_train.cpu()
            X_test=X_test.cpu()
            y_train = y_train.cpu()
            y_test = y_test.cpu()


            plt.figure(figsize=(10, 7), dpi=100)
            sc_1 = plt.scatter(X_train[:, 0], y_train[:, 0], color = 'black', s=5, label = 'Тренировочные данные')
            plt.scatter(X_train[:, 0], y_train[:, 1], color = 'black', s=5)
            sc_2 = plt.scatter(X_test[:, 0], y_test[:, 0], color = 'red', s=5, label = 'Тестовые данные')
            plt.scatter(X_test[:, 0], y_test[:, 1], color = 'red', s=5)
            plt.plot(full_X[:, 0], z_full[:, 0], color = 'green', label = 'Аппроксимация')
            plt.plot(full_X[:, 0], z_full[:, 1], color = 'green')
            
            t = np.linspace(-3*np.pi/4, np.pi/4, 100, endpoint = True)
            plt.plot(t, np.sin(t), color = 'pink', label = 'Незашумленные данные')
            plt.plot(t, np.cos(t), color = 'pink')
            plt.legend()
            plt.xlabel('x')
            plt.ylabel('y')
            plt.title('Результат ') 
        
            wandb.log({"Result": wandb.Image(plt), 'epoch': epoch}, step=example_ct)
        return val_loss               

In [69]:
device

'cuda'

In [23]:
parameters_dict

{'dataset_type': {'value': 'hyst_saved'},
 'optimizer': {'value': 'adam'},
 'arch_in_dim': {'value': 2},
 'arch_hid_dim': {'values': [2, 4, 8, 16]},
 'epochs': {'values': [1000, 3000, 10000]},
 'batch_size': {'values': [16, 32, 64, 128]},
 'learning_rate': {'value': 0.01},
 'solver_type': {'values': ['euler', 'rk4', 'dopri5', 'bosh3']},
 'time_idependent_num_ts': {'values': [2, 5, 10, 50]}}

In [25]:
        trainloader, testloader, full_X, full_y, no_noise_y = build_dataset('hyst_saved', batch_size = 32)
        network = build_network(2, 4)
        optimizer = build_optimizer(network, 'adam', 0.01)

In [49]:
full_X.shape

torch.Size([100, 2])

In [43]:
t = torch.linspace(0,1, 5).to(device)
x, y = next(iter(trainloader))
x, y =x.to(device), y.to(device)

In [38]:
t

tensor([0.], device='cuda:0')

In [44]:
    
        
    # Forward pass ➡
    pred_y = odeint(network, x, t=t)


In [63]:
pred_y = odeint(network, y0=full_X.to(device), t=t)

In [52]:
pred_y[-1].shape

torch.Size([100, 2])

In [48]:
x.shape

torch.Size([32, 2])

In [46]:
pred_y[-1].shape

torch.Size([32, 2])

In [31]:
load_existing_hyst_dataset()

(<torch.utils.data.dataset.TensorDataset at 0x1eba9210eb0>,
 <torch.utils.data.dataset.TensorDataset at 0x1eba9210730>,
 tensor([[-2.3562,  1.0000],
         [-2.3245,  1.0000],
         [-2.2927,  1.0000],
         [-2.2610,  1.0000],
         [-2.2293,  1.0000],
         [-2.1975,  1.0000],
         [-2.1658,  1.0000],
         [-2.1341,  1.0000],
         [-2.1023,  1.0000],
         [-2.0706,  1.0000],
         [-2.0389,  1.0000],
         [-2.0071,  1.0000],
         [-1.9754,  1.0000],
         [-1.9437,  1.0000],
         [-1.9119,  1.0000],
         [-1.8802,  1.0000],
         [-1.8485,  1.0000],
         [-1.8167,  1.0000],
         [-1.7850,  1.0000],
         [-1.7533,  1.0000],
         [-1.7215,  1.0000],
         [-1.6898,  1.0000],
         [-1.6581,  1.0000],
         [-1.6263,  1.0000],
         [-1.5946,  1.0000],
         [-1.5629,  1.0000],
         [-1.5311,  1.0000],
         [-1.4994,  1.0000],
         [-1.4677,  1.0000],
         [-1.4359,  1.0000],
         [

In [82]:
sweep_id = wandb.sweep(sweep_config, project="pytorch-sweeps-demo")

Create sweep with ID: ii8ecgh5
Sweep URL: https://wandb.ai/peter_chizhov/pytorch-sweeps-demo/sweeps/ii8ecgh5


In [84]:
wandb.agent(sweep_id, train, count=30)

[34m[1mwandb[0m: Agent Starting Run: tmjpmzlm with config:
[34m[1mwandb[0m: 	arch_hid_dim: 4
[34m[1mwandb[0m: 	arch_in_dim: 2
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dataset_type: hyst_saved
[34m[1mwandb[0m: 	epochs: 1000
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	return_whole_sequence: False
[34m[1mwandb[0m: 	solver_type: euler
[34m[1mwandb[0m: 	time_idependent_num_ts: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.01
    maximize: False
    weight_decay: 0
)


  0%|          | 0/1000 [00:00<?, ?it/s]

logging: epoch=49, loss=tensor(0.0779, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=3739
logging: epoch=99, loss=tensor(0.0565, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=7489
logging: epoch=149, loss=tensor(0.0169, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=11239
logging: epoch=199, loss=tensor(0.0084, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=14989
logging: epoch=249, loss=tensor(0.0101, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=18739
logging: epoch=299, loss=tensor(0.0094, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=22489
logging: epoch=349, loss=tensor(0.0088, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=26239
logging: epoch=399, loss=tensor(0.0078, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=29989
logging: epoch=449, loss=tensor(0.0078, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=33739
logging: epoch=499, loss=tensor(0.0079, device='cuda:0', grad_fn=<MseLossBack

VBox(children=(Label(value='0.188 MB of 0.204 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.922105…

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇███
test_mse,▁▁▁▁▁▁▁▄▅█
train_mse,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▄▅▇█

0,1
epoch,999.0
test_mse,1456.41772
train_mse,1405.44995


[34m[1mwandb[0m: Agent Starting Run: kbz551h6 with config:
[34m[1mwandb[0m: 	arch_hid_dim: 4
[34m[1mwandb[0m: 	arch_in_dim: 2
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dataset_type: hyst_saved
[34m[1mwandb[0m: 	epochs: 10000
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	return_whole_sequence: False
[34m[1mwandb[0m: 	solver_type: bosh3
[34m[1mwandb[0m: 	time_idependent_num_ts: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.01
    maximize: False
    weight_decay: 0
)


  0%|          | 0/10000 [00:00<?, ?it/s]

logging: epoch=98, loss=tensor(0.0542, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=7425
logging: epoch=198, loss=tensor(0.0160, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=14925
logging: epoch=298, loss=tensor(0.0072, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=22425
logging: epoch=398, loss=tensor(0.0067, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=29925
logging: epoch=498, loss=tensor(0.0063, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=37425
logging: epoch=598, loss=tensor(0.0060, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=44925
logging: epoch=698, loss=tensor(0.0057, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=52425
logging: epoch=798, loss=tensor(0.0055, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=59925
logging: epoch=898, loss=tensor(0.0053, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=67425
logging: epoch=998, loss=tensor(0.0052, device='cuda:0', grad_fn=<MseLossBa

  plt.figure(figsize=(10, 7), dpi=100)


logging: epoch=1898, loss=tensor(0.0048, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=142425
logging: epoch=1998, loss=tensor(0.0048, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=149925
logging: epoch=2098, loss=tensor(0.0048, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=157425
logging: epoch=2198, loss=tensor(0.0047, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=164925
logging: epoch=2298, loss=tensor(0.0045, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=172425
logging: epoch=2398, loss=tensor(0.0039, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=179925
logging: epoch=2498, loss=tensor(0.0030, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=187425
logging: epoch=2598, loss=tensor(0.0025, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=194925
logging: epoch=2698, loss=tensor(0.0021, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=202425
logging: epoch=2798, loss=tensor(0.0019, device='cuda:0

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_mse,█▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_mse,█▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,9999.0
test_mse,0.00167
train_mse,0.00133


[34m[1mwandb[0m: Agent Starting Run: xy7t4qjr with config:
[34m[1mwandb[0m: 	arch_hid_dim: 16
[34m[1mwandb[0m: 	arch_in_dim: 2
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dataset_type: hyst_saved
[34m[1mwandb[0m: 	epochs: 1000
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	return_whole_sequence: False
[34m[1mwandb[0m: 	solver_type: euler
[34m[1mwandb[0m: 	time_idependent_num_ts: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.01
    maximize: False
    weight_decay: 0
)


  0%|          | 0/1000 [00:00<?, ?it/s]

logging: epoch=98, loss=tensor(0.0552, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=7425
logging: epoch=198, loss=tensor(0.0094, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=14925
logging: epoch=298, loss=tensor(0.0063, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=22425
logging: epoch=398, loss=tensor(0.0059, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=29925
logging: epoch=498, loss=tensor(0.0056, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=37425
logging: epoch=598, loss=tensor(0.0053, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=44925
logging: epoch=698, loss=tensor(0.0051, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=52425
logging: epoch=798, loss=tensor(0.0049, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=59925
logging: epoch=898, loss=tensor(0.0045, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=67425
logging: epoch=998, loss=tensor(0.0022, device='cuda:0', grad_fn=<MseLossBa

VBox(children=(Label(value='0.637 MB of 0.637 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▂▂▃▃▃▃▄▄▅▅▆▆▆▆▇▇██
test_mse,█▂▂▂▁▁▁▁▁▁
train_mse,█▂▂▁▁▁▁▁▁▁

0,1
epoch,999.0
test_mse,0.00338
train_mse,0.00215


[34m[1mwandb[0m: Agent Starting Run: ffvvrn78 with config:
[34m[1mwandb[0m: 	arch_hid_dim: 4
[34m[1mwandb[0m: 	arch_in_dim: 2
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dataset_type: hyst_saved
[34m[1mwandb[0m: 	epochs: 10000
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	return_whole_sequence: False
[34m[1mwandb[0m: 	solver_type: euler
[34m[1mwandb[0m: 	time_idependent_num_ts: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.01
    maximize: False
    weight_decay: 0
)


  0%|          | 0/10000 [00:00<?, ?it/s]

logging: epoch=49, loss=tensor(0.0681, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=3739
logging: epoch=99, loss=tensor(0.0255, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=7489
logging: epoch=149, loss=tensor(0.0110, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=11239
logging: epoch=199, loss=tensor(0.0120, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=14989
logging: epoch=249, loss=tensor(0.0111, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=18739
logging: epoch=299, loss=tensor(0.0094, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=22489
logging: epoch=349, loss=tensor(0.0082, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=26239
logging: epoch=399, loss=tensor(0.0081, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=29989
logging: epoch=449, loss=tensor(0.0081, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=33739
logging: epoch=499, loss=tensor(0.0082, device='cuda:0', grad_fn=<MseLossBack

VBox(children=(Label(value='0.254 MB of 0.254 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_mse,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇██
train_mse,▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇██

0,1
epoch,9999.0
test_mse,169775.875
train_mse,175068.21875


[34m[1mwandb[0m: Agent Starting Run: 0c3zdf5a with config:
[34m[1mwandb[0m: 	arch_hid_dim: 16
[34m[1mwandb[0m: 	arch_in_dim: 2
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dataset_type: hyst_saved
[34m[1mwandb[0m: 	epochs: 1000
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	return_whole_sequence: False
[34m[1mwandb[0m: 	solver_type: rk4
[34m[1mwandb[0m: 	time_idependent_num_ts: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.01
    maximize: False
    weight_decay: 0
)


  0%|          | 0/1000 [00:00<?, ?it/s]

logging: epoch=19, loss=tensor(0.0445, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=1489
logging: epoch=39, loss=tensor(0.0122, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=2989
logging: epoch=59, loss=tensor(0.0067, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=4489
logging: epoch=79, loss=tensor(0.0057, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=5989
logging: epoch=99, loss=tensor(0.0042, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=7489
logging: epoch=119, loss=tensor(0.0029, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=8989
logging: epoch=139, loss=tensor(0.0024, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=10489
logging: epoch=159, loss=tensor(0.0022, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=11989
logging: epoch=179, loss=tensor(0.0021, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=13489
logging: epoch=199, loss=tensor(0.0021, device='cuda:0', grad_fn=<MseLossBackward0>)

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_mse,█▂▂▂▁▁▁▁▁▁
train_mse,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,999.0
test_mse,0.00214
train_mse,0.00191


[34m[1mwandb[0m: Agent Starting Run: ftocmemv with config:
[34m[1mwandb[0m: 	arch_hid_dim: 8
[34m[1mwandb[0m: 	arch_in_dim: 2
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dataset_type: hyst_saved
[34m[1mwandb[0m: 	epochs: 3000
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	return_whole_sequence: False
[34m[1mwandb[0m: 	solver_type: euler
[34m[1mwandb[0m: 	time_idependent_num_ts: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.01
    maximize: False
    weight_decay: 0
)


  0%|          | 0/3000 [00:00<?, ?it/s]

logging: epoch=98, loss=tensor(0.0564, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=7425
logging: epoch=198, loss=tensor(0.0210, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=14925
logging: epoch=298, loss=tensor(0.0056, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=22425
logging: epoch=398, loss=tensor(0.0049, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=29925
logging: epoch=498, loss=tensor(0.0044, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=37425
logging: epoch=598, loss=tensor(0.0040, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=44925
logging: epoch=698, loss=tensor(0.0037, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=52425
logging: epoch=798, loss=tensor(0.0033, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=59925
logging: epoch=898, loss=tensor(0.0028, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=67425
logging: epoch=998, loss=tensor(0.0022, device='cuda:0', grad_fn=<MseLossBa

VBox(children=(Label(value='1.010 MB of 1.013 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.996849…

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
test_mse,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_mse,█▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,2999.0
test_mse,0.00183
train_mse,0.00139


[34m[1mwandb[0m: Agent Starting Run: ijuljgum with config:
[34m[1mwandb[0m: 	arch_hid_dim: 4
[34m[1mwandb[0m: 	arch_in_dim: 2
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dataset_type: hyst_saved
[34m[1mwandb[0m: 	epochs: 1000
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	return_whole_sequence: False
[34m[1mwandb[0m: 	solver_type: dopri5
[34m[1mwandb[0m: 	time_idependent_num_ts: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.01
    maximize: False
    weight_decay: 0
)


  0%|          | 0/1000 [00:00<?, ?it/s]

logging: epoch=19, loss=tensor(0.0495, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=1489
logging: epoch=39, loss=tensor(0.0239, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=2989
logging: epoch=59, loss=tensor(0.0059, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=4489
logging: epoch=79, loss=tensor(0.0061, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=5989
logging: epoch=99, loss=tensor(0.0061, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=7489
logging: epoch=119, loss=tensor(0.0061, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=8989
logging: epoch=139, loss=tensor(0.0060, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=10489
logging: epoch=159, loss=tensor(0.0059, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=11989
logging: epoch=179, loss=tensor(0.0058, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=13489
logging: epoch=199, loss=tensor(0.0057, device='cuda:0', grad_fn=<MseLossBackward0>)

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test_mse,█▇▆▄▂▂▁▁▁▁
train_mse,█▄▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,999.0
test_mse,0.0024
train_mse,0.00207


[34m[1mwandb[0m: Agent Starting Run: 7220phsk with config:
[34m[1mwandb[0m: 	arch_hid_dim: 2
[34m[1mwandb[0m: 	arch_in_dim: 2
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dataset_type: hyst_saved
[34m[1mwandb[0m: 	epochs: 3000
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	return_whole_sequence: False
[34m[1mwandb[0m: 	solver_type: bosh3
[34m[1mwandb[0m: 	time_idependent_num_ts: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.01
    maximize: False
    weight_decay: 0
)


  0%|          | 0/3000 [00:00<?, ?it/s]

logging: epoch=32, loss=tensor(0.0378, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=2475
logging: epoch=66, loss=tensor(0.0452, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=4982
logging: epoch=99, loss=tensor(0.0062, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=7489
logging: epoch=132, loss=tensor(0.0042, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=9975
logging: epoch=166, loss=tensor(0.0082, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=12482
logging: epoch=199, loss=tensor(0.0055, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=14989
logging: epoch=232, loss=tensor(0.0043, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=17475
logging: epoch=266, loss=tensor(0.0078, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=19982
logging: epoch=299, loss=tensor(0.0052, device='cuda:0', grad_fn=<MseLossBackward0>), example_ct=22489
logging: epoch=332, loss=tensor(0.0041, device='cuda:0', grad_fn=<MseLossBackwar