In [2]:
#Ｉmport necessary packages
import torch
import torch.nn as nn
import numpy as np
from matplotlib import pyplot as plt
import xitorch
from xitorch.optimize import rootfinder
import optuna
from torch.utils.data import TensorDataset, DataLoader
import joblib 

# Memory management on GPU
import gc

# Import time
import time

# Testify whether GPU is available
print("Cuda is available: ", torch.cuda.is_available())
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = "cpu"
print("Device is: ", device)

def memory_stats():
    print("Memory allocated: ", torch.cuda.memory_allocated()/1024**2)
    print("Memory cached: ", torch.cuda.memory_reserved()/1024**2)
memory_stats()


Cuda is available:  True
Device is:  cuda:0
Memory allocated:  0.0
Memory cached:  0.0


In [3]:
# Define MLP for potentials
class PP(nn.Module):
    # Constructor
    def __init__(self, NNs, input_dim = 1, output_dim = 1):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, NNs[0]), 
            nn.ELU(),
        )
        
        for i in range(len(NNs) - 1):
            self.fc.append(nn.Linear(NNs[i], NNs[i + 1]))
            self.fc.append(nn.ELU())
        
        self.fc.append(nn.Linear(NNs[-1], output_dim))
    
    # Forward function
    def forward(self, x):
        return self.fc(x)

# Data generation

In [4]:
from DataGeneration import generateSamples, genVVtt
import os

generating_flag = False
kwgs = {
    "beta" : [0.011, 0.016, 1. / 1.e1, 0.58], 
    "totalNofSeqs" : 1024 * 16, 
    "NofIntervalsRange" : [5, 11], 
    "VVRange" : [-10, 3], 
    "VVLenRange" : [8, 9], 
    "theta0" : 1., 
    "prefix" : "Trial1003", 
    "NofVVSteps" : 400, 
}

# Generate / load data
dataFile = "./data/" + kwgs["prefix"] + ".pt"

if generating_flag or not(os.path.isfile(dataFile)):
    print("Generating data")
    generateSamples(kwgs)

shit = torch.load(dataFile)
Vs = shit["Vs"]
thetas = shit["thetas"]
fs = shit["fs"]

# Stack data as
Vs = torch.stack(Vs)[:1000, :500]
thetas = torch.stack(thetas)[:1000, :500]
fs = torch.stack(fs)[:1000, :500]
ts = shit["ts"][:1000, :500]

In [5]:
# Now Vs and ts have fixed length
print("Vs.shape: ", Vs.shape)
print("thetas.shape: ", thetas.shape)
print("fs.shape: ", fs.shape)
print("ts.shape: ", ts.shape)

Vs.shape:  torch.Size([1000, 500])
thetas.shape:  torch.Size([1000, 500])
fs.shape:  torch.Size([1000, 500])
ts.shape:  torch.Size([1000, 500])


In [6]:
# Calculate Xs
Xs = torch.zeros(Vs.shape)
Xs[:, 1:] = torch.cumulative_trapezoid(Xs, ts)
print("Xs.shape: ", Xs.shape)

Xs.shape:  torch.Size([1000, 500])


# Defining NNs, for $W (V, \xi)$ and $D (V, \xi, \dot{\xi})$

# Calculate $f = \partial W / \partial V$, $\xi_{n+1}$ such that $\partial D / \partial \dot{\xi} + \partial W / \partial \xi = 0$

In [7]:
# Define class for training and calculating f
# Optimizer Adams
import torch.optim as optim

class PotentialsFric:
    # Initialization of W and D
    def __init__(self, kwgsPot):
        self.dim_xi = kwgsPot["dim_xi"]
        self.NNs_W = kwgsPot["NNs_W"]
        self.NNs_D = kwgsPot["NNs_D"]
        self.W = PP(self.NNs_W, input_dim = 1 + self.dim_xi, output_dim = 1)
        # self.D = PP(self.NNs_D, input_dim = 1 + 2 * self.dim_xi, output_dim = 1)
        self.optim_W = optim.Adam(self.W.parameters(), lr=kwgsPot["learning_rate"])
        # self.optim_D = optim.Adam(self.D.parameters(), lr=0.001)
        
        # Device
        self.device = kwgsPot["device"]
        self.W.to(self.device)
        
    # Calculate f 
    def calf(self, x, t):
        # Initialize Vs
        batch_size = x.shape[0]
        time_steps = x.shape[1]
        # xis[:, :, :] = 1. 
        
        
        # Loop through time steps
        
        if self.dim_xi > 0:
            xi0 = torch.zeros([batch_size, self.dim_xi], requires_grad=True, device=self.device)
            
            # List of fs
            list_fs = []
            list_xis = [xi0]
            
            for idx in range(x.shape[1]):
                # f = \partial W / \partial V
                X_W = torch.concat([x[:, idx:idx + 1], list_xis[-1]], dim = 1).requires_grad_()
                # X_W.to(self.device)
                W = torch.sum(self.W(X_W))

                this_piece = torch.autograd.grad(outputs=W, inputs=X_W, create_graph=True)[0]
                list_fs.append(this_piece[:, 0:1])

                # Solve for \dot{\xi} + \partial W / \partial \xi = 0
                dWdXi = this_piece[:, 1:]

                # XiDot = -dWdXi
                if idx < x.shape[1] - 1:
                    xiNext = list_xis[-1] - dWdXi * (t[:, idx + 1:idx + 2] - t[:, idx:idx + 1]) 
                    list_xis.append(xiNext)
                    
                self.fs = torch.concat(list_fs, dim=1)
        else:
            X_W = x.clone().reshape([x.shape[0], x.shape[1], 1]).requires_grad_()
            # print(X_W)
            W = torch.sum(self.W(X_W))
            self.fs = torch.autograd.grad(outputs=W, inputs=X_W, create_graph=True)[0].reshape([x.shape[0], x.shape[1]])
            # print('self.fs.shape: ', self.fs.shape)
            
            

In [8]:
# Different Potentials with D correction
class PotentialsFricCorrection:
    # Initialization of W and D
    def __init__(self, kwgsPot):
        self.dim_xi = kwgsPot["dim_xi"]
        self.NNs_W = kwgsPot["NNs_W"]
        self.NNs_D = kwgsPot["NNs_D"]
        self.W = PP(self.NNs_W, input_dim = 1 + self.dim_xi, output_dim = 1)
        self.D = PP(self.NNs_D, input_dim = self.dim_xi, output_dim = self.dim_xi)
        self.optim_W = optim.Adam(self.W.parameters(), lr=kwgsPot["learning_rate"])
        self.optim_D = optim.Adam(self.D.parameters(), lr=kwgsPot["learning_rate_D"])
        
        # Device
        self.device = kwgsPot["device"]
        self.W.to(self.device)
        self.D.to(self.device)
        
    # Calculate f 
    def calf(self, x, t):
        # Initialize Vs
        batch_size = x.shape[0]
        time_steps = x.shape[1]
        # xis[:, :, :] = 1. 
        
        
        # Loop through time steps
        
        if self.dim_xi > 0:
            xi0 = torch.zeros([batch_size, self.dim_xi], requires_grad=True, device=self.device)
            
            # List of fs
            list_fs = []
            list_xis = [xi0]
            
            for idx in range(x.shape[1]):
                # f = \partial W / \partial V
                X_W = torch.concat([x[:, idx:idx + 1], list_xis[-1]], dim = 1).requires_grad_()
                # X_W.to(self.device)
                W = torch.sum(self.W(X_W))

                this_piece = torch.autograd.grad(outputs=W, inputs=X_W, create_graph=True)[0]
                list_fs.append(this_piece[:, 0:1])

                # Solve for \dot{\xi} + \partial W / \partial \xi = 0
                dWdXi = this_piece[:, 1:]

                # XiDot = -dWdXi
                if idx < x.shape[1] - 1:
                    xiNext = list_xis[-1] + self.D(-dWdXi) * (t[:, idx + 1:idx + 2] - t[:, idx:idx + 1])
                    list_xis.append(xiNext)
                    
                self.fs = torch.concat(list_fs, dim=1)
        else:
            X_W = x.clone().reshape([x.shape[0], x.shape[1], 1]).requires_grad_()
            # print(X_W)
            W = torch.sum(self.W(X_W))
            self.fs = torch.autograd.grad(outputs=W, inputs=X_W, create_graph=True)[0].reshape([x.shape[0], x.shape[1]])

# Define Loss function, training function, dataloaders

In [19]:
# Define loss functions given fs_targ, fs. 
def Loss(fs_targ, fs, ts, p = 2):
    err = torch.trapz(torch.abs(fs_targ - fs) ** p, ts, dim = 1) / torch.trapz(torch.abs(fs_targ) ** p, ts, dim = 1)
    err = torch.pow(err, 1. / p)
    return torch.sum(err)

# Training for one epoch
def train1Epoch(data_loader, loss_fn, myPot, p, update_weights=True):
    # Record of losses for each batch
    Losses = []
    device=myPot.device
    
    # Enumerate over data_loader
    for idx, (Xs, ts, fs_targ) in enumerate(data_loader):
        # Send shits to GPU
        Xs = Xs.to(device)
        ts = ts.to(device)
        fs_targ = fs_targ.to(device)
        
        # Refresh the optimizers
        myPot.optim_W.zero_grad()
        
        if hasattr(myPot, 'optim_D'):
            myPot.optim_D.zero_grad()
        
        ## DEBUG LINE CHECK DEVICES
        # print("Xs.device: ", Xs.device)
        # print("Xs[:, 0:1].device: ", Xs[:, 0:1].device)
        
        # Compute loss
        myPot.calf(Xs, ts)
        loss = loss_fn(fs_targ, myPot.fs, ts, p)
        Losses.append(loss)
        
        # Update the model parameters
        if update_weights:
            loss.backward()
            myPot.optim_W.step()
        
            if hasattr(myPot, 'optim_D'):
                myPot.optim_D.step()
        
        
    res = sum(Losses) / len(data_loader.dataset)
    print("Memory before del in train1Epoch: ")
    memory_stats()

    del Xs, ts, fs_targ, Losses
    torch.cuda.empty_cache()

    print("Memory after del in train1Epoch: ")
    memory_stats()
    return res



In [20]:
# Initialize dataloaders
AllData = TensorDataset(
    Xs, 
    ts, 
    fs
)

dataloader_kwargs = {'num_workers': 1, 'pin_memory': True} if torch.cuda.is_available() else {}
# train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, **kwargs)

train_len = int(len(Vs) * 0.8)
test_len = len(Vs) - train_len
trainDataset, testDataset = torch.utils.data.random_split(AllData, [train_len, test_len])

# Make an optuna function

In [21]:
class OptunaObj:
    # Initialize
    def __init__(self, kwgs):
        self.dim_xi = kwgs['dim_xi']
        self.test_p = kwgs['test_p']
        self.test_batch_size = kwgs['test_batch_size']
        self.device = kwgs['device']
        self.training_dataset = kwgs['training_dataset']
        self.test_dataset = kwgs['test_dataset']
        
        
    # Define the objective
    def objective(self, trial):
        # Dump for un-saved interuptions
        joblib.dump(this_study, "./data/study_dim_xi_DLeg_" + str(self.dim_xi) + ".pkl")

        # Fixed parameters
        dim_xi = self.dim_xi
        NNs_D = []
        test_p = self.test_p
        test_batch_size = self.test_batch_size

        # Define NN for W
        W_layers = trial.suggest_int('W_layers', 2, 5)
        NNs_W = []
        for i in range(W_layers):
            this_W = 2 ** trial.suggest_int('W_layer_units_exponent_{}'.format(i), 4, 10)
            NNs_W.append(this_W)
            
        # Define NN for D
        D_layers = trial.suggest_int('D_layers', 2, 5)
        NNs_D = []
        for i in range(D_layers):
            this_D = 2 ** trial.suggest_int('D_layer_units_exponent_{}'.format(i), 4, 10)
            NNs_D.append(this_D)

        # Suggest learning rate
        learning_rate = 10 ** trial.suggest_float('log_learning_rate', -5., -1.)
        
        # Suggest learning rate for D
        learning_rate_D = 10 ** trial.suggest_float('log_learning_rate_D', -5., -1.)

        # Suggest batchsize
        training_batch_size = 2 ** trial.suggest_int('training_batch_size', 6, 12)

        # Suggest training p
        training_p = trial.suggest_int('training_p', 2, 8)

        # Suggest training epochs
        # training_epochs = 2 ** trial.suggest_int('training_epoch_exponents', 5, 9)
        training_epochs = 100

        params = {
            'dim_xi' : dim_xi, 
            'NNs_W' : NNs_W, 
            'NNs_D' : NNs_D, 
            'learning_rate' : learning_rate, 
            'learning_rate_D' : learning_rate_D, 
            'training_batch_size' : training_batch_size, 
            'training_p' : training_p, 
            'training_epochs' : training_epochs, 
            'device' : self.device,
        }
        
        
        # Set training dataloader
        training_batch_size = params['training_batch_size'] #1024
        trainDataLoader = DataLoader(
            self.training_dataset,
            batch_size = training_batch_size,
            shuffle = True,
        #    num_workers = 16,
            collate_fn = None,
            **dataloader_kwargs, 
        )

        # Set testing data loader
        testing_batch_size = self.test_batch_size # 256
        testDataLoader = DataLoader(
            self.test_dataset,
            batch_size = testing_batch_size,
            shuffle = True,
        #    num_workers = 16,
            collate_fn = None,
            **dataloader_kwargs, 
        )
        
        # Print out info
        print("-"*20, " Trial ", str(trial.number), " ", "-"*20, flush=True)
        st = time.time()
        print("Start timing: ")
        
        print("Parameters: ", flush=True)
        print(trial.params, flush=True)
        
        # Training
        myWD = PotentialsFricCorrection(params)
        for i in range(params['training_epochs']):
            avg_training_loss = train1Epoch(trainDataLoader, Loss, myWD, params['training_p'])
            
            if torch.isnan(avg_training_loss):
                break
            
            if i % 10 == 0:
                # avg_test_loss = train1Epoch(testDataLoader, Loss, myWD, self.test_p, update_weights=False)
                print("\t", "epoch ", str(i), "training error: ", str(avg_training_loss), flush=True)
        
        # Return objective value for optuna
        res = train1Epoch(testDataLoader, Loss, myWD, self.test_p, update_weights=False)
        print("Time for this trial: ", time.time() - st)
        # Release GPU memory
        del myWD
        gc.collect()
        torch.cuda.empty_cache()
        
        ## Print memory status
        print("Memory status after this trial: ")
        memory_stats()
        
        return res
            
            

In [22]:
# Do a parametric study over number of hidden parameters
dim_xis = [1, 2, 4]
studys = []

# Tune parameters for dim_xi = 4
OptKwgs = {
    'dim_xi' : 4, 
    'test_p' : 2, 
    'test_batch_size' : len(testDataset), 
    'device' : device, 
    'training_dataset' : trainDataset, 
    'test_dataset' : testDataset, 
}

# Loop through all dim_xis
for dim_xi in dim_xis:
    OptKwgs['dim_xi'] = dim_xi
    myOpt = OptunaObj(OptKwgs)
    this_study = optuna.create_study(direction='minimize')
    this_study.optimize(myOpt.objective, n_trials=50)
    studys.append(this_study)


[I 2023-10-26 22:05:02,998] A new study created in memory with name: no-name-9baa6280-af84-4488-b792-facfaaa9f199


--------------------  Trial  0   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 8, 'W_layer_units_exponent_1': 4, 'D_layers': 5, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 9, 'D_layer_units_exponent_2': 6, 'D_layer_units_exponent_3': 9, 'D_layer_units_exponent_4': 9, 'log_learning_rate': -4.1183890496934845, 'log_learning_rate_D': -4.14126284747863, 'training_batch_size': 7, 'training_p': 8}
Memory before del in train1Epoch: 
Memory allocated:  1793.4306640625
Memory cached:  3252.0
Memory after del in train1Epoch: 
Memory allocated:  1793.24462890625
Memory cached:  1848.0
	 epoch  0 training error:  tensor(1.1323, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1793.43798828125
Memory cached:  3102.0
Memory after del in train1Epoch: 
Memory allocated:  1793.251953125
Memory cached:  1848.0
Memory before del in train1Epoch: 
Memory allocated:  1793.43798828125
Memory cached:  3

[I 2023-10-26 22:33:55,370] Trial 0 finished with value: 0.19846731424331665 and parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 8, 'W_layer_units_exponent_1': 4, 'D_layers': 5, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 9, 'D_layer_units_exponent_2': 6, 'D_layer_units_exponent_3': 9, 'D_layer_units_exponent_4': 9, 'log_learning_rate': -4.1183890496934845, 'log_learning_rate_D': -4.14126284747863, 'training_batch_size': 7, 'training_p': 8}. Best is trial 0 with value: 0.19846731424331665.


Memory status after this trial: 
Memory allocated:  3809.00244140625
Memory cached:  3866.0
--------------------  Trial  1   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 7, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 5, 'D_layers': 3, 'D_layer_units_exponent_0': 7, 'D_layer_units_exponent_1': 4, 'D_layer_units_exponent_2': 5, 'log_learning_rate': -4.912670568368801, 'log_learning_rate_D': -4.657566056967868, 'training_batch_size': 8, 'training_p': 4}
Memory before del in train1Epoch: 
Memory allocated:  1784.51904296875
Memory cached:  3866.0
Memory after del in train1Epoch: 
Memory allocated:  1784.33447265625
Memory cached:  1840.0
	 epoch  0 training error:  tensor(1.1148, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1784.5263671875
Memory cached:  2400.0
Memory after del in train1Epoch: 
Memory allocated:  1784.341796875
Memory cached:  1840.0
Memory before del in train1

[I 2023-10-26 22:50:33,140] Trial 1 finished with value: 0.20141993463039398 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 7, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 5, 'D_layers': 3, 'D_layer_units_exponent_0': 7, 'D_layer_units_exponent_1': 4, 'D_layer_units_exponent_2': 5, 'log_learning_rate': -4.912670568368801, 'log_learning_rate_D': -4.657566056967868, 'training_batch_size': 8, 'training_p': 4}. Best is trial 0 with value: 0.19846731424331665.


Memory status after this trial: 
Memory allocated:  2251.10498046875
Memory cached:  2260.0
--------------------  Trial  2   --------------------
Start timing: 
Parameters: 
{'W_layers': 4, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 5, 'W_layer_units_exponent_2': 4, 'W_layer_units_exponent_3': 5, 'D_layers': 2, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 7, 'log_learning_rate': -4.917961379596061, 'log_learning_rate_D': -1.279257125933483, 'training_batch_size': 7, 'training_p': 3}
Memory before del in train1Epoch: 
Memory allocated:  1784.8505859375
Memory cached:  2260.0
Memory after del in train1Epoch: 
Memory allocated:  1784.66455078125
Memory cached:  1840.0
	 epoch  0 training error:  tensor(0.9712, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1784.85791015625
Memory cached:  2070.0
Memory after del in train1Epoch: 
Memory allocated:  1784.671875
Memory cached:  1840.0
Memory before del in train1Epo

[I 2023-10-26 23:20:21,825] Trial 2 finished with value: 0.8290948271751404 and parameters: {'W_layers': 4, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 5, 'W_layer_units_exponent_2': 4, 'W_layer_units_exponent_3': 5, 'D_layers': 2, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 7, 'log_learning_rate': -4.917961379596061, 'log_learning_rate_D': -1.279257125933483, 'training_batch_size': 7, 'training_p': 3}. Best is trial 0 with value: 0.19846731424331665.


Memory status after this trial: 
Memory allocated:  2214.33154296875
Memory cached:  2224.0
--------------------  Trial  3   --------------------
Start timing: 
Parameters: 
{'W_layers': 4, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 9, 'W_layer_units_exponent_2': 5, 'W_layer_units_exponent_3': 7, 'D_layers': 5, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 10, 'D_layer_units_exponent_2': 4, 'D_layer_units_exponent_3': 6, 'D_layer_units_exponent_4': 9, 'log_learning_rate': -3.263046259865143, 'log_learning_rate_D': -4.8338829023600365, 'training_batch_size': 7, 'training_p': 8}
Memory before del in train1Epoch: 
Memory allocated:  1801.7158203125
Memory cached:  3780.0
Memory after del in train1Epoch: 
Memory allocated:  1801.52978515625
Memory cached:  1858.0
	 epoch  0 training error:  tensor(0.5518, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1801.72314453125
Memory cached:  3780.0
Memory after del in tr

[I 2023-10-26 23:55:35,656] Trial 3 finished with value: 0.20140330493450165 and parameters: {'W_layers': 4, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 9, 'W_layer_units_exponent_2': 5, 'W_layer_units_exponent_3': 7, 'D_layers': 5, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 10, 'D_layer_units_exponent_2': 4, 'D_layer_units_exponent_3': 6, 'D_layer_units_exponent_4': 9, 'log_learning_rate': -3.263046259865143, 'log_learning_rate_D': -4.8338829023600365, 'training_batch_size': 7, 'training_p': 8}. Best is trial 0 with value: 0.19846731424331665.


Memory status after this trial: 
Memory allocated:  4831.62646484375
Memory cached:  4878.0
--------------------  Trial  4   --------------------
Start timing: 
Parameters: 
{'W_layers': 5, 'W_layer_units_exponent_0': 7, 'W_layer_units_exponent_1': 4, 'W_layer_units_exponent_2': 8, 'W_layer_units_exponent_3': 5, 'W_layer_units_exponent_4': 7, 'D_layers': 2, 'D_layer_units_exponent_0': 4, 'D_layer_units_exponent_1': 7, 'log_learning_rate': -4.271964066438514, 'log_learning_rate_D': -2.515723439447328, 'training_batch_size': 9, 'training_p': 4}
Memory before del in train1Epoch: 
Memory allocated:  1786.59228515625
Memory cached:  4878.0
Memory after del in train1Epoch: 
Memory allocated:  1784.94384765625
Memory cached:  1840.0
	 epoch  0 training error:  tensor(1.0453, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1786.599609375
Memory cached:  4166.0
Memory after del in train1Epoch: 
Memory allocated:  1784.951171875
Memory cached:  1840

[I 2023-10-27 00:06:48,567] Trial 4 finished with value: 0.19848953187465668 and parameters: {'W_layers': 5, 'W_layer_units_exponent_0': 7, 'W_layer_units_exponent_1': 4, 'W_layer_units_exponent_2': 8, 'W_layer_units_exponent_3': 5, 'W_layer_units_exponent_4': 7, 'D_layers': 2, 'D_layer_units_exponent_0': 4, 'D_layer_units_exponent_1': 7, 'log_learning_rate': -4.271964066438514, 'log_learning_rate_D': -2.515723439447328, 'training_batch_size': 9, 'training_p': 4}. Best is trial 0 with value: 0.19846731424331665.


Memory status after this trial: 
Memory allocated:  2702.87451171875
Memory cached:  2714.0
--------------------  Trial  5   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 5, 'W_layer_units_exponent_2': 6, 'D_layers': 4, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 10, 'D_layer_units_exponent_2': 8, 'D_layer_units_exponent_3': 10, 'log_learning_rate': -2.1831828311227013, 'log_learning_rate_D': -1.6680264742444773, 'training_batch_size': 7, 'training_p': 5}
Memory before del in train1Epoch: 
Memory allocated:  1796.4111328125
Memory cached:  3150.0
Memory after del in train1Epoch: 
Memory allocated:  1796.22509765625
Memory cached:  1840.0
	 epoch  0 training error:  tensor(0.7158, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1796.41845703125
Memory cached:  3150.0
Memory after del in train1Epoch: 
Memory allocated:  1796.232421875
Memory cached: 

[I 2023-10-27 00:37:14,258] Trial 5 finished with value: 0.20143163204193115 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 5, 'W_layer_units_exponent_2': 6, 'D_layers': 4, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 10, 'D_layer_units_exponent_2': 8, 'D_layer_units_exponent_3': 10, 'log_learning_rate': -2.1831828311227013, 'log_learning_rate_D': -1.6680264742444773, 'training_batch_size': 7, 'training_p': 5}. Best is trial 0 with value: 0.19846731424331665.


--------------------  Trial  6   --------------------
Start timing: 
Parameters: 
{'W_layers': 4, 'W_layer_units_exponent_0': 10, 'W_layer_units_exponent_1': 5, 'W_layer_units_exponent_2': 4, 'W_layer_units_exponent_3': 7, 'D_layers': 2, 'D_layer_units_exponent_0': 7, 'D_layer_units_exponent_1': 9, 'log_learning_rate': -2.5921864465905635, 'log_learning_rate_D': -4.477701513715461, 'training_batch_size': 10, 'training_p': 3}
Memory before del in train1Epoch: 
Memory allocated:  1791.7734375
Memory cached:  12292.0
Memory after del in train1Epoch: 
Memory allocated:  1787.19580078125
Memory cached:  1856.0
	 epoch  0 training error:  tensor(0.9384, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1791.81005859375
Memory cached:  11460.0
Memory after del in train1Epoch: 
Memory allocated:  1787.232421875
Memory cached:  1856.0
Memory before del in train1Epoch: 
Memory allocated:  1791.78076171875
Memory cached:  11480.0
Memory after del in tr

[I 2023-10-27 00:43:33,836] Trial 6 finished with value: 0.19913999736309052 and parameters: {'W_layers': 4, 'W_layer_units_exponent_0': 10, 'W_layer_units_exponent_1': 5, 'W_layer_units_exponent_2': 4, 'W_layer_units_exponent_3': 7, 'D_layers': 2, 'D_layer_units_exponent_0': 7, 'D_layer_units_exponent_1': 9, 'log_learning_rate': -2.5921864465905635, 'log_learning_rate_D': -4.477701513715461, 'training_batch_size': 10, 'training_p': 3}. Best is trial 0 with value: 0.19846731424331665.


Memory status after this trial: 
Memory allocated:  4057.73583984375
Memory cached:  4080.0
--------------------  Trial  7   --------------------
Start timing: 
Parameters: 
{'W_layers': 5, 'W_layer_units_exponent_0': 9, 'W_layer_units_exponent_1': 7, 'W_layer_units_exponent_2': 5, 'W_layer_units_exponent_3': 6, 'W_layer_units_exponent_4': 4, 'D_layers': 5, 'D_layer_units_exponent_0': 5, 'D_layer_units_exponent_1': 9, 'D_layer_units_exponent_2': 6, 'D_layer_units_exponent_3': 5, 'D_layer_units_exponent_4': 8, 'log_learning_rate': -2.8638235773737666, 'log_learning_rate_D': -3.795362932770152, 'training_batch_size': 9, 'training_p': 4}
Memory before del in train1Epoch: 
Memory allocated:  1788.32666015625
Memory cached:  6492.0
Memory after del in train1Epoch: 
Memory allocated:  1786.67822265625
Memory cached:  1840.0
	 epoch  0 training error:  tensor(0.8740, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1788.333984375
Memory cached:  6

[I 2023-10-27 00:57:09,091] Trial 7 finished with value: 0.20179858803749084 and parameters: {'W_layers': 5, 'W_layer_units_exponent_0': 9, 'W_layer_units_exponent_1': 7, 'W_layer_units_exponent_2': 5, 'W_layer_units_exponent_3': 6, 'W_layer_units_exponent_4': 4, 'D_layers': 5, 'D_layer_units_exponent_0': 5, 'D_layer_units_exponent_1': 9, 'D_layer_units_exponent_2': 6, 'D_layer_units_exponent_3': 5, 'D_layer_units_exponent_4': 8, 'log_learning_rate': -2.8638235773737666, 'log_learning_rate_D': -3.795362932770152, 'training_batch_size': 9, 'training_p': 4}. Best is trial 0 with value: 0.19846731424331665.


Memory status after this trial: 
Memory allocated:  3612.01806640625
Memory cached:  3628.0
--------------------  Trial  8   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 9, 'W_layer_units_exponent_1': 8, 'D_layers': 5, 'D_layer_units_exponent_0': 7, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 9, 'D_layer_units_exponent_3': 10, 'D_layer_units_exponent_4': 8, 'log_learning_rate': -2.4790426316739667, 'log_learning_rate_D': -1.3698262366307992, 'training_batch_size': 11, 'training_p': 4}
Memory before del in train1Epoch: 
Memory allocated:  1804.5703125
Memory cached:  12686.0
Memory after del in train1Epoch: 
Memory allocated:  1799.99267578125
Memory cached:  1876.0
	 epoch  0 training error:  tensor(1.1872, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1804.63232421875
Memory cached:  12692.0
Memory after del in train1Epoch: 
Memory allocated:  1799.970703125
Memory cached:  

[I 2023-10-27 01:03:55,367] Trial 8 finished with value: 0.20184087753295898 and parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 9, 'W_layer_units_exponent_1': 8, 'D_layers': 5, 'D_layer_units_exponent_0': 7, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 9, 'D_layer_units_exponent_3': 10, 'D_layer_units_exponent_4': 8, 'log_learning_rate': -2.4790426316739667, 'log_learning_rate_D': -1.3698262366307992, 'training_batch_size': 11, 'training_p': 4}. Best is trial 0 with value: 0.19846731424331665.


4355.08642578125
Memory cached:  4454.0
--------------------  Trial  9   --------------------
Start timing: 
Parameters: 
{'W_layers': 4, 'W_layer_units_exponent_0': 10, 'W_layer_units_exponent_1': 5, 'W_layer_units_exponent_2': 10, 'W_layer_units_exponent_3': 8, 'D_layers': 5, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 4, 'D_layer_units_exponent_3': 9, 'D_layer_units_exponent_4': 8, 'log_learning_rate': -3.69648123442084, 'log_learning_rate_D': -4.702209400873736, 'training_batch_size': 6, 'training_p': 4}
Memory before del in train1Epoch: 
Memory allocated:  1791.669921875
Memory cached:  4454.0
Memory after del in train1Epoch: 
Memory allocated:  1791.48095703125
Memory cached:  1840.0
	 epoch  0 training error:  tensor(0.3531, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1791.68310546875
Memory cached:  3176.0
Memory after del in train1Epoch: 
Memory allocated:  1791.494140625
Memory ca

[I 2023-10-27 02:08:45,091] Trial 9 finished with value: 0.22084471583366394 and parameters: {'W_layers': 4, 'W_layer_units_exponent_0': 10, 'W_layer_units_exponent_1': 5, 'W_layer_units_exponent_2': 10, 'W_layer_units_exponent_3': 8, 'D_layers': 5, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 4, 'D_layer_units_exponent_3': 9, 'D_layer_units_exponent_4': 8, 'log_learning_rate': -3.69648123442084, 'log_learning_rate_D': -4.702209400873736, 'training_batch_size': 6, 'training_p': 4}. Best is trial 0 with value: 0.19846731424331665.


Memory status after this trial: 
Memory allocated:  6074.11474609375
Memory cached:  6170.0
--------------------  Trial  10   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 7, 'D_layer_units_exponent_3': 8, 'log_learning_rate': -1.5009377783999849, 'log_learning_rate_D': -3.47110139494725, 'training_batch_size': 12, 'training_p': 8}
Memory before del in train1Epoch: 
Memory allocated:  1795.337890625
Memory cached:  9530.0
Memory after del in train1Epoch: 
Memory allocated:  1790.76025390625
Memory cached:  1856.0
	 epoch  0 training error:  tensor(1.3512, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1795.4111328125
Memory cached:  8204.0
Memory after del in train1Epoch: 
Memory allocated:  1790.767578125
Memory cached:  1856.0
Memory before del in train

[I 2023-10-27 02:14:58,876] Trial 10 finished with value: 0.1983436495065689 and parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 7, 'D_layer_units_exponent_3': 8, 'log_learning_rate': -1.5009377783999849, 'log_learning_rate_D': -3.47110139494725, 'training_batch_size': 12, 'training_p': 8}. Best is trial 10 with value: 0.1983436495065689.


Memory status after this trial: 
Memory allocated:  3301.11962890625
Memory cached:  3352.0
--------------------  Trial  11   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 10, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 7, 'D_layer_units_exponent_3': 8, 'log_learning_rate': -1.1736981137043894, 'log_learning_rate_D': -3.560955128138543, 'training_batch_size': 12, 'training_p': 8}
Memory before del in train1Epoch: 
Memory allocated:  1796.2998046875
Memory cached:  12606.0
Memory after del in train1Epoch: 
Memory allocated:  1791.72216796875
Memory cached:  1856.0
	 epoch  0 training error:  tensor(1.1046, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1796.30712890625
Memory cached:  12610.0
Memory after del in train1Epoch: 
Memory allocated:  1791.7294921875
Memory cached:  1856.0
Memory before del i

[I 2023-10-27 02:21:33,044] Trial 11 finished with value: 0.20136669278144836 and parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 10, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 7, 'D_layer_units_exponent_3': 8, 'log_learning_rate': -1.1736981137043894, 'log_learning_rate_D': -3.560955128138543, 'training_batch_size': 12, 'training_p': 8}. Best is trial 10 with value: 0.1983436495065689.


Memory cached:  4404.0
--------------------  Trial  12   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 8, 'W_layer_units_exponent_1': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 7, 'D_layer_units_exponent_3': 7, 'log_learning_rate': -1.0093073194842321, 'log_learning_rate_D': -3.219297207351273, 'training_batch_size': 12, 'training_p': 7}
Memory before del in train1Epoch: 
Memory allocated:  1795.466796875
Memory cached:  9048.0
Memory after del in train1Epoch: 
Memory allocated:  1790.88916015625
Memory cached:  1858.0
	 epoch  0 training error:  tensor(0.7665, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1795.5400390625
Memory cached:  9062.0
Memory after del in train1Epoch: 
Memory allocated:  1790.896484375
Memory cached:  1858.0
Memory before del in train1Epoch: 
Memory allocated:  1795.5400390625
Memory cached:  9062.0
M

[W 2023-10-27 02:22:27,997] Trial 12 failed with parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 8, 'W_layer_units_exponent_1': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 7, 'D_layer_units_exponent_3': 7, 'log_learning_rate': -1.0093073194842321, 'log_learning_rate_D': -3.219297207351273, 'training_batch_size': 12, 'training_p': 7} because of the following error: The value nan is not acceptable.
[W 2023-10-27 02:22:28,000] Trial 12 failed with value tensor(nan, device='cuda:0', grad_fn=<DivBackward0>).


--------------------  Trial  13   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 8, 'W_layer_units_exponent_1': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 7, 'D_layer_units_exponent_3': 7, 'log_learning_rate': -1.0865258912827027, 'log_learning_rate_D': -3.1517494011332725, 'training_batch_size': 12, 'training_p': 7}
Memory before del in train1Epoch: 
Memory allocated:  1795.466796875
Memory cached:  9048.0
Memory after del in train1Epoch: 
Memory allocated:  1790.88916015625
Memory cached:  1858.0
	 epoch  0 training error:  tensor(0.8716, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1795.5400390625
Memory cached:  9062.0
Memory after del in train1Epoch: 
Memory allocated:  1790.896484375
Memory cached:  1858.0
Memory before del in train1Epoch: 
Memory allocated:  1795.5400390625
Memory cached:  9062.0
Memory after del in tra

[W 2023-10-27 02:23:06,709] Trial 13 failed with parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 8, 'W_layer_units_exponent_1': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 7, 'D_layer_units_exponent_3': 7, 'log_learning_rate': -1.0865258912827027, 'log_learning_rate_D': -3.1517494011332725, 'training_batch_size': 12, 'training_p': 7} because of the following error: The value nan is not acceptable.
[W 2023-10-27 02:23:06,712] Trial 13 failed with value tensor(nan, device='cuda:0', grad_fn=<DivBackward0>).


Memory status after this trial: 
Memory allocated:  3496.69189453125
Memory cached:  3586.0
--------------------  Trial  14   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 8, 'W_layer_units_exponent_1': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 7, 'D_layer_units_exponent_3': 7, 'log_learning_rate': -1.1049481648898598, 'log_learning_rate_D': -3.291574500453016, 'training_batch_size': 12, 'training_p': 7}
Memory before del in train1Epoch: 
Memory allocated:  1795.466796875
Memory cached:  9048.0
Memory after del in train1Epoch: 
Memory allocated:  1790.88916015625
Memory cached:  1858.0
	 epoch  0 training error:  tensor(1.0079, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1795.5400390625
Memory cached:  9062.0
Memory after del in train1Epoch: 
Memory allocated:  1790.896484375
Memory cached:  1858.0
Memory before del in trai

[W 2023-10-27 02:24:25,783] Trial 14 failed with parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 8, 'W_layer_units_exponent_1': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 7, 'D_layer_units_exponent_3': 7, 'log_learning_rate': -1.1049481648898598, 'log_learning_rate_D': -3.291574500453016, 'training_batch_size': 12, 'training_p': 7} because of the following error: The value nan is not acceptable.
[W 2023-10-27 02:24:25,785] Trial 14 failed with value tensor(nan, device='cuda:0', grad_fn=<DivBackward0>).


Memory before del in train1Epoch: 
Memory allocated:  3500.87451171875
Memory cached:  3608.0
Memory after del in train1Epoch: 
Memory allocated:  3499.72900390625
Memory cached:  3586.0
Time for this trial:  78.68707752227783
Memory status after this trial: 
Memory allocated:  3496.69189453125
Memory cached:  3586.0
--------------------  Trial  15   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 8, 'W_layer_units_exponent_1': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 7, 'D_layer_units_exponent_3': 7, 'log_learning_rate': -1.1451683987748695, 'log_learning_rate_D': -3.254603616402931, 'training_batch_size': 12, 'training_p': 7}
Memory before del in train1Epoch: 
Memory allocated:  1795.466796875
Memory cached:  9048.0
Memory after del in train1Epoch: 
Memory allocated:  1790.88916015625
Memory cached:  1858.0
	 epoch  0 training error:  tensor(0.7514, device='cuda:0', grad_f

[I 2023-10-27 02:31:13,978] Trial 15 finished with value: 0.20303156971931458 and parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 8, 'W_layer_units_exponent_1': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 7, 'D_layer_units_exponent_3': 7, 'log_learning_rate': -1.1451683987748695, 'log_learning_rate_D': -3.254603616402931, 'training_batch_size': 12, 'training_p': 7}. Best is trial 10 with value: 0.1983436495065689.


Memory before del in train1Epoch: 
Memory allocated:  3500.87451171875
Memory cached:  3608.0
Memory after del in train1Epoch: 
Memory allocated:  3499.72900390625
Memory cached:  3586.0
Time for this trial:  407.8144783973694
Memory status after this trial: 
Memory allocated:  3496.69189453125
Memory cached:  3586.0
--------------------  Trial  16   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 8, 'D_layers': 3, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 10, 'log_learning_rate': -1.8877360683921491, 'log_learning_rate_D': -3.9862757904883015, 'training_batch_size': 10, 'training_p': 7}
Memory before del in train1Epoch: 
Memory allocated:  1797.4765625
Memory cached:  11356.0
Memory after del in train1Epoch: 
Memory allocated:  1792.89892578125
Memory cached:  1856.0
	 epoch  0 training error:  tensor(0.5531, device='cuda:0', grad_f

[I 2023-10-27 02:38:57,963] Trial 16 finished with value: 0.20092883706092834 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 8, 'D_layers': 3, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 10, 'log_learning_rate': -1.8877360683921491, 'log_learning_rate_D': -3.9862757904883015, 'training_batch_size': 10, 'training_p': 7}. Best is trial 10 with value: 0.1983436495065689.


Memory before del in train1Epoch: 
Memory allocated:  3941.94677734375
Memory cached:  4026.0
Memory after del in train1Epoch: 
Memory allocated:  3940.80126953125
Memory cached:  4004.0
Time for this trial:  463.6061849594116
Memory status after this trial: 
Memory allocated:  3936.75927734375
Memory cached:  4004.0
--------------------  Trial  17   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 8, 'W_layer_units_exponent_1': 4, 'D_layers': 4, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 6, 'D_layer_units_exponent_3': 8, 'log_learning_rate': -3.4644440698220462, 'log_learning_rate_D': -2.862026331466451, 'training_batch_size': 6, 'training_p': 6}
Memory before del in train1Epoch: 
Memory allocated:  1785.23046875
Memory cached:  4004.0
Memory after del in train1Epoch: 
Memory allocated:  1785.04150390625
Memory cached:  1840.0
	 epoch  0 training error:  tensor(0.4713, device='cuda:0', grad_fn=<

[I 2023-10-27 03:28:41,164] Trial 17 finished with value: 0.19930888712406158 and parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 8, 'W_layer_units_exponent_1': 4, 'D_layers': 4, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 6, 'D_layer_units_exponent_3': 8, 'log_learning_rate': -3.4644440698220462, 'log_learning_rate_D': -2.862026331466451, 'training_batch_size': 6, 'training_p': 6}. Best is trial 10 with value: 0.1983436495065689.


--------------------  Trial  18   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 6, 'D_layers': 3, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 9, 'D_layer_units_exponent_2': 6, 'log_learning_rate': -3.957998701122873, 'log_learning_rate_D': -4.121993411644265, 'training_batch_size': 8, 'training_p': 7}
Memory before del in train1Epoch: 
Memory allocated:  1788.89990234375
Memory cached:  3118.0
Memory after del in train1Epoch: 
Memory allocated:  1788.71533203125
Memory cached:  1840.0
	 epoch  0 training error:  tensor(0.9664, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1788.9072265625
Memory cached:  3118.0
Memory after del in train1Epoch: 
Memory allocated:  1788.72265625
Memory cached:  1842.0
Memory before del in train1Epoch: 
Memory allocated:  1788.9072265625
Memory cached:  3118.0
Memory after del in train1Epoch: 
Memory allocated:  1788

[I 2023-10-27 03:44:10,883] Trial 18 finished with value: 0.19839218258857727 and parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 6, 'D_layers': 3, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 9, 'D_layer_units_exponent_2': 6, 'log_learning_rate': -3.957998701122873, 'log_learning_rate_D': -4.121993411644265, 'training_batch_size': 8, 'training_p': 7}. Best is trial 10 with value: 0.1983436495065689.


--------------------  Trial  19   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 10, 'D_layers': 3, 'D_layer_units_exponent_0': 6, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 8, 'log_learning_rate': -1.824085044983161, 'log_learning_rate_D': -3.3316745558594083, 'training_batch_size': 10, 'training_p': 6}
Memory before del in train1Epoch: 
Memory allocated:  1792.525390625
Memory cached:  9474.0
Memory after del in train1Epoch: 
Memory allocated:  1787.94775390625
Memory cached:  1858.0
	 epoch  0 training error:  tensor(0.9598, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1792.5986328125
Memory cached:  9476.0
Memory after del in train1Epoch: 
Memory allocated:  1787.955078125
Memory cached:  1858.0
Memory before del in train1Epoch: 
Memory allocated:  1792.5986328125
Memory cached:  9476.0
Memory after del in trai

[I 2023-10-27 03:50:43,523] Trial 19 finished with value: 0.1984843760728836 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 10, 'D_layers': 3, 'D_layer_units_exponent_0': 6, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 8, 'log_learning_rate': -1.824085044983161, 'log_learning_rate_D': -3.3316745558594083, 'training_batch_size': 10, 'training_p': 6}. Best is trial 10 with value: 0.1983436495065689.


Memory status after this trial: 
Memory allocated:  3594.34228515625
Memory cached:  3652.0
--------------------  Trial  20   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 8, 'D_layers': 3, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 10, 'D_layer_units_exponent_2': 8, 'log_learning_rate': -3.0628949337120344, 'log_learning_rate_D': -4.200056062216468, 'training_batch_size': 8, 'training_p': 7}
Memory before del in train1Epoch: 
Memory allocated:  1796.47802734375
Memory cached:  4054.0
Memory after del in train1Epoch: 
Memory allocated:  1796.29345703125
Memory cached:  1860.0
	 epoch  0 training error:  tensor(0.7517, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1796.4853515625
Memory cached:  4054.0
Memory after del in train1Epoch: 
Memory allocated:  1796.30078125
Memory cached:  1860.0
Memory before del in train1Epoch: 
Memory allocated:  17

[I 2023-10-27 04:06:43,360] Trial 20 finished with value: 0.2019694447517395 and parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 8, 'D_layers': 3, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 10, 'D_layer_units_exponent_2': 8, 'log_learning_rate': -3.0628949337120344, 'log_learning_rate_D': -4.200056062216468, 'training_batch_size': 8, 'training_p': 7}. Best is trial 10 with value: 0.1983436495065689.


3499.73583984375
Memory cached:  3580.0
--------------------  Trial  21   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 7, 'W_layer_units_exponent_2': 8, 'D_layers': 3, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 5, 'log_learning_rate': -1.541729102957722, 'log_learning_rate_D': -3.7481861229868474, 'training_batch_size': 11, 'training_p': 6}
Memory before del in train1Epoch: 
Memory allocated:  1791.8046875
Memory cached:  7688.0
Memory after del in train1Epoch: 
Memory allocated:  1787.22705078125
Memory cached:  1856.0
	 epoch  0 training error:  tensor(1.2047, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1791.8779296875
Memory cached:  7690.0
Memory after del in train1Epoch: 
Memory allocated:  1787.234375
Memory cached:  1856.0
Memory before del in train1Epoch: 
Memory allocated:  1791.8779296875
Memory cache

[I 2023-10-27 04:12:56,080] Trial 21 finished with value: 0.1982005089521408 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 7, 'W_layer_units_exponent_2': 8, 'D_layers': 3, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 5, 'log_learning_rate': -1.541729102957722, 'log_learning_rate_D': -3.7481861229868474, 'training_batch_size': 11, 'training_p': 6}. Best is trial 21 with value: 0.1982005089521408.


Memory status after this trial: 
Memory allocated:  3178.11279296875
Memory cached:  3208.0
--------------------  Trial  22   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 7, 'W_layer_units_exponent_2': 8, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 5, 'D_layer_units_exponent_3': 5, 'log_learning_rate': -1.426626963903669, 'log_learning_rate_D': -2.8520471205691678, 'training_batch_size': 11, 'training_p': 2}
Memory before del in train1Epoch: 
Memory allocated:  1791.822265625
Memory cached:  7790.0
Memory after del in train1Epoch: 
Memory allocated:  1787.24462890625
Memory cached:  1858.0
	 epoch  0 training error:  tensor(0.7869, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1791.8955078125
Memory cached:  7792.0
Memory after del in train1Epoch: 
Memory allocated:  1787.251953125
Memory cached:  1

[I 2023-10-27 04:19:25,213] Trial 22 finished with value: 0.19836921989917755 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 7, 'W_layer_units_exponent_2': 8, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 5, 'D_layer_units_exponent_3': 5, 'log_learning_rate': -1.426626963903669, 'log_learning_rate_D': -2.8520471205691678, 'training_batch_size': 11, 'training_p': 2}. Best is trial 21 with value: 0.1982005089521408.


Memory before del in train1Epoch: 
Memory allocated:  3204.84716796875
Memory cached:  3254.0
Memory after del in train1Epoch: 
Memory allocated:  3203.70166015625
Memory cached:  3232.0
Time for this trial:  388.7357838153839
Memory status after this trial: 
Memory allocated:  3202.48681640625
Memory cached:  3232.0
--------------------  Trial  23   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 9, 'W_layer_units_exponent_2': 7, 'D_layers': 3, 'D_layer_units_exponent_0': 6, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 5, 'log_learning_rate': -1.6419242027225518, 'log_learning_rate_D': -3.692357357081483, 'training_batch_size': 11, 'training_p': 6}
Memory before del in train1Epoch: 
Memory allocated:  1791.404296875
Memory cached:  6612.0
Memory after del in train1Epoch: 
Memory allocated:  1786.82666015625
Memory cached:  1856.0
	 epoch  0 training error:  tensor(1.0467, device='cuda:0', grad_fn

[I 2023-10-27 04:24:55,268] Trial 23 finished with value: 0.2020144760608673 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 9, 'W_layer_units_exponent_2': 7, 'D_layers': 3, 'D_layer_units_exponent_0': 6, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 5, 'log_learning_rate': -1.6419242027225518, 'log_learning_rate_D': -3.692357357081483, 'training_batch_size': 11, 'training_p': 6}. Best is trial 21 with value: 0.1982005089521408.


Memory status after this trial: 
Memory allocated:  2861.99462890625
Memory cached:  2874.0
--------------------  Trial  24   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 7, 'W_layer_units_exponent_2': 8, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 5, 'D_layer_units_exponent_3': 4, 'log_learning_rate': -1.4684192895990413, 'log_learning_rate_D': -2.8290235203395078, 'training_batch_size': 11, 'training_p': 2}
Memory before del in train1Epoch: 
Memory allocated:  1791.814453125
Memory cached:  7712.0
Memory after del in train1Epoch: 
Memory allocated:  1787.23681640625
Memory cached:  1856.0
	 epoch  0 training error:  tensor(0.9989, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1791.8876953125
Memory cached:  7714.0
Memory after del in train1Epoch: 
Memory allocated:  1787.244140625
Memory cached:  

[I 2023-10-27 04:31:21,006] Trial 24 finished with value: 0.1982765942811966 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 7, 'W_layer_units_exponent_2': 8, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 5, 'D_layer_units_exponent_3': 4, 'log_learning_rate': -1.4684192895990413, 'log_learning_rate_D': -2.8290235203395078, 'training_batch_size': 11, 'training_p': 2}. Best is trial 21 with value: 0.1982005089521408.


Memory status after this trial: 
Memory allocated:  3190.30029296875
Memory cached:  3208.0
--------------------  Trial  25   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 9, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 4, 'D_layer_units_exponent_3': 4, 'log_learning_rate': -1.4578013652998167, 'log_learning_rate_D': -2.4588109544589143, 'training_batch_size': 12, 'training_p': 2}
Memory before del in train1Epoch: 
Memory allocated:  1792.859375
Memory cached:  9728.0
Memory after del in train1Epoch: 
Memory allocated:  1788.28173828125
Memory cached:  1856.0
	 epoch  0 training error:  tensor(0.9794, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1792.86669921875
Memory cached:  9732.0
Memory after del in train1Epoch: 
Memory allocated:  1788.2890625
Memory cached:  1856

[W 2023-10-27 04:32:06,204] Trial 25 failed with parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 9, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 4, 'D_layer_units_exponent_3': 4, 'log_learning_rate': -1.4578013652998167, 'log_learning_rate_D': -2.4588109544589143, 'training_batch_size': 12, 'training_p': 2} because of the following error: The value nan is not acceptable.
[W 2023-10-27 04:32:06,207] Trial 25 failed with value tensor(nan, device='cuda:0', grad_fn=<DivBackward0>).


Memory before del in train1Epoch: 
Memory allocated:  3645.43505859375
Memory cached:  3690.0
Memory after del in train1Epoch: 
Memory allocated:  3644.28955078125
Memory cached:  3668.0
Time for this trial:  44.71348762512207
Memory status after this trial: 
Memory allocated:  3642.55615234375
Memory cached:  3668.0
--------------------  Trial  26   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 9, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 4, 'D_layer_units_exponent_3': 4, 'log_learning_rate': -1.0750907642609953, 'log_learning_rate_D': -2.389920167829458, 'training_batch_size': 12, 'training_p': 2}
Memory before del in train1Epoch: 
Memory allocated:  1792.859375
Memory cached:  9728.0
Memory after del in train1Epoch: 
Memory allocated:  1788.28173828125
Memory cached:  1856.0
	 epoch  0 training error:  tensor(0.8

[I 2023-10-27 04:38:36,289] Trial 26 finished with value: 2.966290235519409 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 9, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 4, 'D_layer_units_exponent_3': 4, 'log_learning_rate': -1.0750907642609953, 'log_learning_rate_D': -2.389920167829458, 'training_batch_size': 12, 'training_p': 2}. Best is trial 21 with value: 0.1982005089521408.


Memory before del in train1Epoch: 
Memory allocated:  3645.43505859375
Memory cached:  3690.0
Memory after del in train1Epoch: 
Memory allocated:  3644.28955078125
Memory cached:  3668.0
Time for this trial:  389.6796524524689
Memory status after this trial: 
Memory allocated:  3642.55615234375
Memory cached:  3668.0
--------------------  Trial  27   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 7, 'W_layer_units_exponent_2': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 7, 'D_layer_units_exponent_2': 5, 'D_layer_units_exponent_3': 7, 'log_learning_rate': -1.5502016544065307, 'log_learning_rate_D': -3.324271642716739, 'training_batch_size': 11, 'training_p': 5}
Memory before del in train1Epoch: 
Memory allocated:  1792.646484375
Memory cached:  7684.0
Memory after del in train1Epoch: 
Memory allocated:  1788.06884765625
Memory cached:  1856.0
	 epoch  0 training error:  tensor(

[I 2023-10-27 04:45:00,659] Trial 27 finished with value: 0.2005966454744339 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 7, 'W_layer_units_exponent_2': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 7, 'D_layer_units_exponent_2': 5, 'D_layer_units_exponent_3': 7, 'log_learning_rate': -1.5502016544065307, 'log_learning_rate_D': -3.324271642716739, 'training_batch_size': 11, 'training_p': 5}. Best is trial 21 with value: 0.1982005089521408.


Memory status after this trial: 
Memory allocated:  3178.24072265625
Memory cached:  3208.0
--------------------  Trial  28   --------------------
Start timing: 
Parameters: 
{'W_layers': 4, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 9, 'W_layer_units_exponent_3': 10, 'D_layers': 4, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 7, 'D_layer_units_exponent_3': 4, 'log_learning_rate': -2.072617040772648, 'log_learning_rate_D': -3.1123165457131154, 'training_batch_size': 12, 'training_p': 5}
Memory before del in train1Epoch: 
Memory allocated:  1799.20947265625
Memory cached:  12428.0
Memory after del in train1Epoch: 
Memory allocated:  1794.6318359375
Memory cached:  1896.0
	 epoch  0 training error:  tensor(1.0363, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1799.23193359375
Memory cached:  12428.0
Memory after del in train1Epoch: 
Memory allocated

[I 2023-10-27 04:52:25,012] Trial 28 finished with value: 0.2012248933315277 and parameters: {'W_layers': 4, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 9, 'W_layer_units_exponent_3': 10, 'D_layers': 4, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 7, 'D_layer_units_exponent_3': 4, 'log_learning_rate': -2.072617040772648, 'log_learning_rate_D': -3.1123165457131154, 'training_batch_size': 12, 'training_p': 5}. Best is trial 21 with value: 0.1982005089521408.


Memory status after this trial: 
Memory allocated:  4293.64599609375
Memory cached:  4390.0
--------------------  Trial  29   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 9, 'W_layer_units_exponent_2': 9, 'D_layers': 3, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 4, 'D_layer_units_exponent_2': 5, 'log_learning_rate': -1.4625308187900743, 'log_learning_rate_D': -3.5886988515781235, 'training_batch_size': 11, 'training_p': 6}
Memory before del in train1Epoch: 
Memory allocated:  1794.466796875
Memory cached:  11890.0
Memory after del in train1Epoch: 
Memory allocated:  1789.88916015625
Memory cached:  1858.0
	 epoch  0 training error:  tensor(0.7186, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1794.583984375
Memory cached:  9578.0
Memory after del in train1Epoch: 
Memory allocated:  1789.896484375
Memory cached:  1858.0
Memory before del in trai

[I 2023-10-27 04:57:47,635] Trial 29 finished with value: 0.1986096203327179 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 9, 'W_layer_units_exponent_2': 9, 'D_layers': 3, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 4, 'D_layer_units_exponent_2': 5, 'log_learning_rate': -1.4625308187900743, 'log_learning_rate_D': -3.5886988515781235, 'training_batch_size': 11, 'training_p': 6}. Best is trial 21 with value: 0.1982005089521408.


Memory status after this trial: 
Memory allocated:  3607.54443359375
Memory cached:  3684.0
--------------------  Trial  30   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 7, 'W_layer_units_exponent_1': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 7, 'D_layer_units_exponent_2': 9, 'D_layer_units_exponent_3': 6, 'log_learning_rate': -1.998212795287698, 'log_learning_rate_D': -2.63895412644799, 'training_batch_size': 10, 'training_p': 3}
Memory before del in train1Epoch: 
Memory allocated:  1793.96484375
Memory cached:  8868.0
Memory after del in train1Epoch: 
Memory allocated:  1789.38720703125
Memory cached:  1858.0
	 epoch  0 training error:  tensor(0.7162, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1793.97216796875
Memory cached:  8872.0
Memory after del in train1Epoch: 
Memory allocated:  1789.39453125
Memory cached:  1858.0
Memory before del in train1E

[I 2023-10-27 05:03:40,878] Trial 30 finished with value: 0.20190055668354034 and parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 7, 'W_layer_units_exponent_1': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 7, 'D_layer_units_exponent_2': 9, 'D_layer_units_exponent_3': 6, 'log_learning_rate': -1.998212795287698, 'log_learning_rate_D': -2.63895412644799, 'training_batch_size': 10, 'training_p': 3}. Best is trial 21 with value: 0.1982005089521408.


Memory before del in train1Epoch: 
Memory allocated:  3450.25146484375
Memory cached:  3522.0
Memory after del in train1Epoch: 
Memory allocated:  3449.10595703125
Memory cached:  3500.0
Time for this trial:  352.83443427085876
Memory status after this trial: 
Memory allocated:  3446.81982421875
Memory cached:  3500.0
--------------------  Trial  31   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 6, 'D_layers': 3, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 6, 'log_learning_rate': -2.2955261209470477, 'log_learning_rate_D': -2.1751920304843035, 'training_batch_size': 12, 'training_p': 8}
Memory before del in train1Epoch: 
Memory allocated:  1790.8818359375
Memory cached:  5930.0
Memory after del in train1Epoch: 
Memory allocated:  1786.30419921875
Memory cached:  1856.0
	 epoch  0 training error:  tensor(0.8776, device='cuda:0', grad

[I 2023-10-27 05:09:43,213] Trial 31 finished with value: 0.19858601689338684 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 6, 'D_layers': 3, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 6, 'log_learning_rate': -2.2955261209470477, 'log_learning_rate_D': -2.1751920304843035, 'training_batch_size': 12, 'training_p': 8}. Best is trial 21 with value: 0.1982005089521408.


Memory before del in train1Epoch: 
Memory allocated:  2765.25146484375
Memory cached:  2808.0
Memory after del in train1Epoch: 
Memory allocated:  2764.10595703125
Memory cached:  2788.0
Time for this trial:  361.8428454399109
Memory status after this trial: 
Memory allocated:  2763.37744140625
Memory cached:  2788.0
--------------------  Trial  32   --------------------
Start timing: 
Parameters: 
{'W_layers': 4, 'W_layer_units_exponent_0': 7, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 8, 'W_layer_units_exponent_3': 10, 'D_layers': 2, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 6, 'log_learning_rate': -1.0076513154366422, 'log_learning_rate_D': -2.993267360855632, 'training_batch_size': 11, 'training_p': 7}
Memory before del in train1Epoch: 
Memory allocated:  1795.6728515625
Memory cached:  13214.0
Memory after del in train1Epoch: 
Memory allocated:  1791.09521484375
Memory cached:  1858.0
	 epoch  0 training error:  tensor(0.9332, device='cuda:0', gra

[I 2023-10-27 05:17:15,567] Trial 32 finished with value: 73.87542724609375 and parameters: {'W_layers': 4, 'W_layer_units_exponent_0': 7, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 8, 'W_layer_units_exponent_3': 10, 'D_layers': 2, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 6, 'log_learning_rate': -1.0076513154366422, 'log_learning_rate_D': -2.993267360855632, 'training_batch_size': 11, 'training_p': 7}. Best is trial 21 with value: 0.1982005089521408.


Memory status after this trial: 
Memory allocated:  4474.01123046875
Memory cached:  4554.0
--------------------  Trial  33   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 7, 'D_layers': 5, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 7, 'D_layer_units_exponent_2': 4, 'D_layer_units_exponent_3': 8, 'D_layer_units_exponent_4': 4, 'log_learning_rate': -1.7990824915472794, 'log_learning_rate_D': -3.4347923735848225, 'training_batch_size': 9, 'training_p': 8}
Memory before del in train1Epoch: 
Memory allocated:  1787.57080078125
Memory cached:  4554.0
Memory after del in train1Epoch: 
Memory allocated:  1785.92236328125
Memory cached:  1840.0
	 epoch  0 training error:  tensor(2.5197, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1787.578125
Memory cached:  4256.0
Memory after del in train1Epoch: 
Memory allocated:  1785.9296875
Memory cached:  1840.0

[I 2023-10-27 05:26:52,650] Trial 33 finished with value: 0.19962508976459503 and parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 7, 'D_layers': 5, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 7, 'D_layer_units_exponent_2': 4, 'D_layer_units_exponent_3': 8, 'D_layer_units_exponent_4': 4, 'log_learning_rate': -1.7990824915472794, 'log_learning_rate_D': -3.4347923735848225, 'training_batch_size': 9, 'training_p': 8}. Best is trial 21 with value: 0.1982005089521408.


Memory status after this trial: 
Memory allocated:  2738.78857421875
Memory cached:  2750.0
--------------------  Trial  34   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 10, 'D_layers': 4, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 5, 'D_layer_units_exponent_3': 6, 'log_learning_rate': -1.3351132075632517, 'log_learning_rate_D': -3.784813984527691, 'training_batch_size': 12, 'training_p': 2}
Memory before del in train1Epoch: 
Memory allocated:  1791.869140625
Memory cached:  8960.0
Memory after del in train1Epoch: 
Memory allocated:  1787.29150390625
Memory cached:  1858.0
	 epoch  0 training error:  tensor(1.1778, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1791.9423828125
Memory cached:  8960.0
Memory after del in train1Epoch: 
Memory allocated:  1787.298828125
Memory cached:  1858.0
Memory before del in trai

[I 2023-10-27 05:32:52,055] Trial 34 finished with value: 0.20016707479953766 and parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 10, 'D_layers': 4, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 5, 'D_layer_units_exponent_3': 6, 'log_learning_rate': -1.3351132075632517, 'log_learning_rate_D': -3.784813984527691, 'training_batch_size': 12, 'training_p': 2}. Best is trial 21 with value: 0.1982005089521408.


Memory status after this trial: 
Memory allocated:  3471.89501953125
Memory cached:  3504.0
--------------------  Trial  35   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 7, 'W_layer_units_exponent_2': 8, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 5, 'D_layer_units_exponent_3': 5, 'log_learning_rate': -1.4216916064216283, 'log_learning_rate_D': -2.86615993788061, 'training_batch_size': 11, 'training_p': 2}
Memory before del in train1Epoch: 
Memory allocated:  1791.822265625
Memory cached:  7790.0
Memory after del in train1Epoch: 
Memory allocated:  1787.24462890625
Memory cached:  1858.0
	 epoch  0 training error:  tensor(0.9611, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1791.8955078125
Memory cached:  7790.0
Memory after del in train1Epoch: 
Memory allocated:  1787.251953125
Memory cached:  18

[I 2023-10-27 05:39:20,545] Trial 35 finished with value: 0.19901344180107117 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 7, 'W_layer_units_exponent_2': 8, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 5, 'D_layer_units_exponent_3': 5, 'log_learning_rate': -1.4216916064216283, 'log_learning_rate_D': -2.86615993788061, 'training_batch_size': 11, 'training_p': 2}. Best is trial 21 with value: 0.1982005089521408.


Memory status after this trial: 
Memory allocated:  3202.48681640625
Memory cached:  3232.0
--------------------  Trial  36   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 4, 'D_layer_units_exponent_3': 5, 'log_learning_rate': -1.6398827849765842, 'log_learning_rate_D': -3.1801183481241586, 'training_batch_size': 10, 'training_p': 2}
Memory before del in train1Epoch: 
Memory allocated:  1791.373046875
Memory cached:  6610.0
Memory after del in train1Epoch: 
Memory allocated:  1786.79541015625
Memory cached:  1856.0
	 epoch  0 training error:  tensor(0.7877, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1791.4462890625
Memory cached:  6610.0
Memory after del in train1Epoch: 
Memory allocated:  1786.802734375
Memory cached:  

[I 2023-10-27 05:45:09,262] Trial 36 finished with value: 0.19820833206176758 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 7, 'D_layers': 4, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 4, 'D_layer_units_exponent_3': 5, 'log_learning_rate': -1.6398827849765842, 'log_learning_rate_D': -3.1801183481241586, 'training_batch_size': 10, 'training_p': 2}. Best is trial 21 with value: 0.1982005089521408.


Memory before del in train1Epoch: 
Memory allocated:  2923.66064453125
Memory cached:  2956.0
Memory after del in train1Epoch: 
Memory allocated:  2922.51513671875
Memory cached:  2934.0
Time for this trial:  348.2910475730896
Memory status after this trial: 
Memory allocated:  2921.52490234375
Memory cached:  2934.0
--------------------  Trial  37   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 7, 'D_layers': 3, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 4, 'log_learning_rate': -1.6804496591734095, 'log_learning_rate_D': -3.074186830685537, 'training_batch_size': 10, 'training_p': 3}
Memory before del in train1Epoch: 
Memory allocated:  1791.0458984375
Memory cached:  5952.0
Memory after del in train1Epoch: 
Memory allocated:  1786.46826171875
Memory cached:  1856.0
	 epoch  0 training error:  tensor(1.2715, device='cuda:0', grad_f

[I 2023-10-27 05:51:13,231] Trial 37 finished with value: 0.19821739196777344 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 7, 'D_layers': 3, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 4, 'log_learning_rate': -1.6804496591734095, 'log_learning_rate_D': -3.074186830685537, 'training_batch_size': 10, 'training_p': 3}. Best is trial 21 with value: 0.1982005089521408.


Memory before del in train1Epoch: 
Memory allocated:  2777.69580078125
Memory cached:  2808.0
Memory after del in train1Epoch: 
Memory allocated:  2776.55029296875
Memory cached:  2788.0
Time for this trial:  363.543434381485
Memory status after this trial: 
Memory allocated:  2775.73974609375
Memory cached:  2788.0
--------------------  Trial  38   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 7, 'D_layers': 3, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 4, 'log_learning_rate': -1.6908395734252064, 'log_learning_rate_D': -3.1635988248633358, 'training_batch_size': 10, 'training_p': 3}
Memory before del in train1Epoch: 
Memory allocated:  1790.6220703125
Memory cached:  4748.0
Memory after del in train1Epoch: 
Memory allocated:  1786.04443359375
Memory cached:  1856.0
	 epoch  0 training error:  tensor(1.2116, device='cuda:0', grad_f

[I 2023-10-27 05:56:33,737] Trial 38 finished with value: 0.19954313337802887 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 7, 'D_layers': 3, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 4, 'log_learning_rate': -1.6908395734252064, 'log_learning_rate_D': -3.1635988248633358, 'training_batch_size': 10, 'training_p': 3}. Best is trial 21 with value: 0.1982005089521408.


Memory status after this trial: 
Memory allocated:  2482.55908203125
Memory cached:  2492.0
--------------------  Trial  39   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 9, 'W_layer_units_exponent_2': 6, 'D_layers': 3, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 4, 'D_layer_units_exponent_2': 4, 'log_learning_rate': -1.9687767697936769, 'log_learning_rate_D': -2.7733788048717414, 'training_batch_size': 10, 'training_p': 2}
Memory before del in train1Epoch: 
Memory allocated:  1791.11767578125
Memory cached:  9192.0
Memory after del in train1Epoch: 
Memory allocated:  1786.5400390625
Memory cached:  1858.0
	 epoch  0 training error:  tensor(0.7684, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1791.125
Memory cached:  9000.0
Memory after del in train1Epoch: 
Memory allocated:  1786.54736328125
Memory cached:  1858.0
Memory before del in train1E

[I 2023-10-27 06:02:02,354] Trial 39 finished with value: 0.1985296905040741 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 9, 'W_layer_units_exponent_2': 6, 'D_layers': 3, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 4, 'D_layer_units_exponent_2': 4, 'log_learning_rate': -1.9687767697936769, 'log_learning_rate_D': -2.7733788048717414, 'training_batch_size': 10, 'training_p': 2}. Best is trial 21 with value: 0.1982005089521408.


Memory before del in train1Epoch: 
Memory allocated:  3472.81787109375
Memory cached:  3542.0
Memory after del in train1Epoch: 
Memory allocated:  3471.67236328125
Memory cached:  3520.0
Time for this trial:  328.209264755249
Memory status after this trial: 
Memory allocated:  3470.82275390625
Memory cached:  3520.0
--------------------  Trial  40   --------------------
Start timing: 
Parameters: 
{'W_layers': 4, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 7, 'W_layer_units_exponent_3': 4, 'D_layers': 2, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 4, 'log_learning_rate': -1.6936846763843816, 'log_learning_rate_D': -3.0931133244708113, 'training_batch_size': 9, 'training_p': 3}
Memory before del in train1Epoch: 
Memory allocated:  1787.00634765625
Memory cached:  4460.0
Memory after del in train1Epoch: 
Memory allocated:  1785.35791015625
Memory cached:  1840.0
	 epoch  0 training error:  tensor(2.4961, device='cuda:0', grad_f

[I 2023-10-27 06:12:30,070] Trial 40 finished with value: 0.19830705225467682 and parameters: {'W_layers': 4, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 7, 'W_layer_units_exponent_3': 4, 'D_layers': 2, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 4, 'log_learning_rate': -1.6936846763843816, 'log_learning_rate_D': -3.0931133244708113, 'training_batch_size': 9, 'training_p': 3}. Best is trial 21 with value: 0.1982005089521408.


Memory before del in train1Epoch: 
Memory allocated:  2820.36767578125
Memory cached:  2832.0
Memory after del in train1Epoch: 
Memory allocated:  2819.22216796875
Memory cached:  2830.0
Time for this trial:  627.2383685112
Memory status after this trial: 
Memory allocated:  2818.46240234375
Memory cached:  2830.0
--------------------  Trial  41   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 7, 'D_layers': 3, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 4, 'log_learning_rate': -2.245846424860207, 'log_learning_rate_D': -4.98777601517031, 'training_batch_size': 10, 'training_p': 3}
Memory before del in train1Epoch: 
Memory allocated:  1790.45703125
Memory cached:  3866.0
Memory after del in train1Epoch: 
Memory allocated:  1785.87939453125
Memory cached:  1856.0
	 epoch  0 training error:  tensor(1.1563, device='cuda:0', grad_fn=<DivB

[I 2023-10-27 06:18:04,452] Trial 41 finished with value: 0.1987726241350174 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 7, 'D_layers': 3, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 4, 'log_learning_rate': -2.245846424860207, 'log_learning_rate_D': -4.98777601517031, 'training_batch_size': 10, 'training_p': 3}. Best is trial 21 with value: 0.1982005089521408.


Memory before del in train1Epoch: 
Memory allocated:  2289.24853515625
Memory cached:  2318.0
Memory after del in train1Epoch: 
Memory allocated:  2288.10302734375
Memory cached:  2296.0
Time for this trial:  333.967143535614
Memory status after this trial: 
Memory allocated:  2287.57080078125
Memory cached:  2296.0
--------------------  Trial  42   --------------------
Start timing: 
Parameters: 
{'W_layers': 4, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 5, 'W_layer_units_exponent_2': 9, 'W_layer_units_exponent_3': 9, 'D_layers': 2, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 6, 'log_learning_rate': -2.0346646218587536, 'log_learning_rate_D': -2.353629974357885, 'training_batch_size': 9, 'training_p': 2}
Memory before del in train1Epoch: 
Memory allocated:  1791.57666015625
Memory cached:  7718.0
Memory after del in train1Epoch: 
Memory allocated:  1789.92822265625
Memory cached:  1844.0
	 epoch  0 training error:  tensor(1.5637, device='cuda:0', grad_f

[I 2023-10-27 06:29:32,751] Trial 42 finished with value: 0.20103156566619873 and parameters: {'W_layers': 4, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 5, 'W_layer_units_exponent_2': 9, 'W_layer_units_exponent_3': 9, 'D_layers': 2, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 6, 'log_learning_rate': -2.0346646218587536, 'log_learning_rate_D': -2.353629974357885, 'training_batch_size': 9, 'training_p': 2}. Best is trial 21 with value: 0.1982005089521408.


Memory before del in train1Epoch: 
Memory allocated:  4063.09716796875
Memory cached:  4130.0
Memory after del in train1Epoch: 
Memory allocated:  4061.95166015625
Memory cached:  4128.0
Time for this trial:  687.8595292568207
Memory status after this trial: 
Memory allocated:  4058.90673828125
Memory cached:  4128.0
--------------------  Trial  43   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 7, 'D_layers': 3, 'D_layer_units_exponent_0': 4, 'D_layer_units_exponent_1': 7, 'D_layer_units_exponent_2': 4, 'log_learning_rate': -1.2331509807632388, 'log_learning_rate_D': -2.662289260348791, 'training_batch_size': 11, 'training_p': 2}
Memory before del in train1Epoch: 
Memory allocated:  1790.806640625
Memory cached:  4622.0
Memory after del in train1Epoch: 
Memory allocated:  1786.22900390625
Memory cached:  1856.0
	 epoch  0 training error:  tensor(1.0230, device='cuda:0', grad_fn

[I 2023-10-27 06:35:34,084] Trial 43 finished with value: 0.20645420253276825 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 7, 'D_layers': 3, 'D_layer_units_exponent_0': 4, 'D_layer_units_exponent_1': 7, 'D_layer_units_exponent_2': 4, 'log_learning_rate': -1.2331509807632388, 'log_learning_rate_D': -2.662289260348791, 'training_batch_size': 11, 'training_p': 2}. Best is trial 21 with value: 0.1982005089521408.


Memory before del in train1Epoch: 
Memory allocated:  2472.92333984375
Memory cached:  2506.0
Memory after del in train1Epoch: 
Memory allocated:  2471.77783203125
Memory cached:  2484.0
Time for this trial:  360.8971269130707
Memory status after this trial: 
Memory allocated:  2471.07080078125
Memory cached:  2484.0
--------------------  Trial  44   --------------------
Start timing: 
Parameters: 
{'W_layers': 5, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 5, 'W_layer_units_exponent_2': 8, 'W_layer_units_exponent_3': 4, 'W_layer_units_exponent_4': 10, 'D_layers': 5, 'D_layer_units_exponent_0': 7, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 5, 'D_layer_units_exponent_3': 4, 'D_layer_units_exponent_4': 5, 'log_learning_rate': -1.2760609481532237, 'log_learning_rate_D': -2.9956807223192756, 'training_batch_size': 10, 'training_p': 3}
Memory before del in train1Epoch: 
Memory allocated:  1790.763671875
Memory cached:  9692.0
Memory after del in train1Epoch: 


[I 2023-10-27 06:43:31,536] Trial 44 finished with value: 0.2051050364971161 and parameters: {'W_layers': 5, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 5, 'W_layer_units_exponent_2': 8, 'W_layer_units_exponent_3': 4, 'W_layer_units_exponent_4': 10, 'D_layers': 5, 'D_layer_units_exponent_0': 7, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 5, 'D_layer_units_exponent_3': 4, 'D_layer_units_exponent_4': 5, 'log_learning_rate': -1.2760609481532237, 'log_learning_rate_D': -2.9956807223192756, 'training_batch_size': 10, 'training_p': 3}. Best is trial 21 with value: 0.1982005089521408.


Memory before del in train1Epoch: 
Memory allocated:  3656.84033203125
Memory cached:  3692.0
Memory after del in train1Epoch: 
Memory allocated:  3655.69482421875
Memory cached:  3670.0
Time for this trial:  477.0081925392151
Memory status after this trial: 
Memory allocated:  3655.00927734375
Memory cached:  3670.0
--------------------  Trial  45   --------------------
Start timing: 
Parameters: 
{'W_layers': 4, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 7, 'W_layer_units_exponent_3': 4, 'D_layers': 2, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 4, 'log_learning_rate': -1.687597934158922, 'log_learning_rate_D': -3.1100811106845825, 'training_batch_size': 9, 'training_p': 3}
Memory before del in train1Epoch: 
Memory allocated:  1787.00634765625
Memory cached:  4460.0
Memory after del in train1Epoch: 
Memory allocated:  1785.35791015625
Memory cached:  1840.0
	 epoch  0 training error:  tensor(2.5000, device='cuda:0', grad_f

[I 2023-10-27 06:54:01,128] Trial 45 finished with value: 0.19840680062770844 and parameters: {'W_layers': 4, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 7, 'W_layer_units_exponent_3': 4, 'D_layers': 2, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 4, 'log_learning_rate': -1.687597934158922, 'log_learning_rate_D': -3.1100811106845825, 'training_batch_size': 9, 'training_p': 3}. Best is trial 21 with value: 0.1982005089521408.


Memory before del in train1Epoch: 
Memory allocated:  2820.36767578125
Memory cached:  2832.0
Memory after del in train1Epoch: 
Memory allocated:  2819.22216796875
Memory cached:  2830.0
Time for this trial:  629.1286296844482
Memory status after this trial: 
Memory allocated:  2818.46240234375
Memory cached:  2830.0
--------------------  Trial  46   --------------------
Start timing: 
Parameters: 
{'W_layers': 4, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 6, 'W_layer_units_exponent_3': 6, 'D_layers': 2, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 4, 'log_learning_rate': -1.6790112775110737, 'log_learning_rate_D': -3.3158900789885872, 'training_batch_size': 9, 'training_p': 4}
Memory before del in train1Epoch: 
Memory allocated:  1786.78759765625
Memory cached:  4352.0
Memory after del in train1Epoch: 
Memory allocated:  1785.13916015625
Memory cached:  1840.0
	 epoch  0 training error:  tensor(2.9955, device='cuda:0', grad_

[I 2023-10-27 07:04:27,928] Trial 46 finished with value: 0.19927898049354553 and parameters: {'W_layers': 4, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 6, 'W_layer_units_exponent_3': 6, 'D_layers': 2, 'D_layer_units_exponent_0': 9, 'D_layer_units_exponent_1': 4, 'log_learning_rate': -1.6790112775110737, 'log_learning_rate_D': -3.3158900789885872, 'training_batch_size': 9, 'training_p': 4}. Best is trial 21 with value: 0.1982005089521408.


Memory before del in train1Epoch: 
Memory allocated:  2777.42431640625
Memory cached:  2788.0
Memory after del in train1Epoch: 
Memory allocated:  2776.27880859375
Memory cached:  2786.0
Time for this trial:  626.3576984405518
Memory status after this trial: 
Memory allocated:  2775.62841796875
Memory cached:  2786.0
--------------------  Trial  47   --------------------
Start timing: 
Parameters: 
{'W_layers': 4, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 7, 'W_layer_units_exponent_2': 8, 'W_layer_units_exponent_3': 5, 'D_layers': 2, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 5, 'log_learning_rate': -2.5005059601204183, 'log_learning_rate_D': -3.4934054740639002, 'training_batch_size': 9, 'training_p': 3}
Memory before del in train1Epoch: 
Memory allocated:  1787.45947265625
Memory cached:  5542.0
Memory after del in train1Epoch: 
Memory allocated:  1785.81103515625
Memory cached:  1840.0
	 epoch  0 training error:  tensor(0.8395, device='cuda:0', grad

[I 2023-10-27 07:14:45,710] Trial 47 finished with value: 0.19853445887565613 and parameters: {'W_layers': 4, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 7, 'W_layer_units_exponent_2': 8, 'W_layer_units_exponent_3': 5, 'D_layers': 2, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 5, 'log_learning_rate': -2.5005059601204183, 'log_learning_rate_D': -3.4934054740639002, 'training_batch_size': 9, 'training_p': 3}. Best is trial 21 with value: 0.1982005089521408.


Memory status after this trial: 
Memory allocated:  3239.02587890625
Memory cached:  3252.0
--------------------  Trial  48   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 9, 'W_layer_units_exponent_2': 7, 'D_layers': 2, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 4, 'log_learning_rate': -1.2947140746322157, 'log_learning_rate_D': -3.0617397064599357, 'training_batch_size': 8, 'training_p': 3}
Memory before del in train1Epoch: 
Memory allocated:  1785.64208984375
Memory cached:  3322.0
Memory after del in train1Epoch: 
Memory allocated:  1785.45751953125
Memory cached:  1842.0
	 epoch  0 training error:  tensor(17.1739, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1785.6494140625
Memory cached:  3322.0
Memory after del in train1Epoch: 
Memory allocated:  1785.46484375
Memory cached:  1842.0
Memory before del in train1Epoch: 
Memory allocated:  1

[W 2023-10-27 07:15:27,694] Trial 48 failed with parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 9, 'W_layer_units_exponent_2': 7, 'D_layers': 2, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 4, 'log_learning_rate': -1.2947140746322157, 'log_learning_rate_D': -3.0617397064599357, 'training_batch_size': 8, 'training_p': 3} because of the following error: The value nan is not acceptable.
[W 2023-10-27 07:15:27,696] Trial 48 failed with value tensor(nan, device='cuda:0', grad_fn=<DivBackward0>).


Memory status after this trial: 
Memory allocated:  2971.73486328125
Memory cached:  2988.0
--------------------  Trial  49   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 9, 'W_layer_units_exponent_2': 7, 'D_layers': 2, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 4, 'log_learning_rate': -1.312169099077092, 'log_learning_rate_D': -3.1108527773831502, 'training_batch_size': 8, 'training_p': 3}
Memory before del in train1Epoch: 
Memory allocated:  1785.64208984375
Memory cached:  3322.0
Memory after del in train1Epoch: 
Memory allocated:  1785.45751953125
Memory cached:  1842.0
	 epoch  0 training error:  tensor(27.6409, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1785.6494140625
Memory cached:  3322.0
Memory after del in train1Epoch: 
Memory allocated:  1785.46484375
Memory cached:  1842.0
Memory before del in train1Epoch: 
Memory allocated:  17

[I 2023-10-27 07:32:14,316] Trial 49 finished with value: 0.19928129017353058 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 5, 'W_layer_units_exponent_1': 9, 'W_layer_units_exponent_2': 7, 'D_layers': 2, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 4, 'log_learning_rate': -1.312169099077092, 'log_learning_rate_D': -3.1108527773831502, 'training_batch_size': 8, 'training_p': 3}. Best is trial 21 with value: 0.1982005089521408.
[I 2023-10-27 07:32:14,366] A new study created in memory with name: no-name-adf556da-9567-4eb6-afa1-e5b437cf607d


--------------------  Trial  0   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 7, 'W_layer_units_exponent_1': 6, 'D_layers': 2, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 8, 'log_learning_rate': -2.119485737617533, 'log_learning_rate_D': -3.9014640449883786, 'training_batch_size': 10, 'training_p': 2}
Memory before del in train1Epoch: 
Memory allocated:  1791.326171875
Memory cached:  4558.0
Memory after del in train1Epoch: 
Memory allocated:  1786.74853515625
Memory cached:  1856.0
	 epoch  0 training error:  tensor(1.2267, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1791.33935546875
Memory cached:  4558.0
Memory after del in train1Epoch: 
Memory allocated:  1786.76171875
Memory cached:  1856.0
Memory before del in train1Epoch: 
Memory allocated:  1791.33935546875
Memory cached:  4558.0
Memory after del in train1Epoch: 
Memory allocated:  1786.76171875
Memory cached:  1856

[I 2023-10-27 07:37:34,836] Trial 0 finished with value: 0.19934989511966705 and parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 7, 'W_layer_units_exponent_1': 6, 'D_layers': 2, 'D_layer_units_exponent_0': 8, 'D_layer_units_exponent_1': 8, 'log_learning_rate': -2.119485737617533, 'log_learning_rate_D': -3.9014640449883786, 'training_batch_size': 10, 'training_p': 2}. Best is trial 0 with value: 0.19934989511966705.


Memory before del in train1Epoch: 
Memory allocated:  2449.22412109375
Memory cached:  2500.0
Memory after del in train1Epoch: 
Memory allocated:  2448.07861328125
Memory cached:  2478.0
Time for this trial:  320.184401512146
Memory status after this trial: 
Memory allocated:  2447.11474609375
Memory cached:  2478.0
--------------------  Trial  1   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 9, 'W_layer_units_exponent_2': 10, 'D_layers': 3, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 8, 'log_learning_rate': -2.0330725361438646, 'log_learning_rate_D': -1.9224963916844784, 'training_batch_size': 8, 'training_p': 5}
Memory before del in train1Epoch: 
Memory allocated:  1793.16162109375
Memory cached:  5672.0
Memory after del in train1Epoch: 
Memory allocated:  1792.97705078125
Memory cached:  1858.0
	 epoch  0 training error:  tensor(6.8880, device='cuda:0', grad_

[I 2023-10-27 07:56:41,137] Trial 1 finished with value: 0.2039358913898468 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 4, 'W_layer_units_exponent_1': 9, 'W_layer_units_exponent_2': 10, 'D_layers': 3, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 8, 'log_learning_rate': -2.0330725361438646, 'log_learning_rate_D': -1.9224963916844784, 'training_batch_size': 8, 'training_p': 5}. Best is trial 0 with value: 0.19934989511966705.


Memory status after this trial: 
Memory allocated:  4769.07666015625
Memory cached:  4844.0
--------------------  Trial  2   --------------------
Start timing: 
Parameters: 
{'W_layers': 5, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 7, 'W_layer_units_exponent_3': 4, 'W_layer_units_exponent_4': 8, 'D_layers': 5, 'D_layer_units_exponent_0': 6, 'D_layer_units_exponent_1': 10, 'D_layer_units_exponent_2': 6, 'D_layer_units_exponent_3': 7, 'D_layer_units_exponent_4': 10, 'log_learning_rate': -4.779773625216466, 'log_learning_rate_D': -3.9338202310108814, 'training_batch_size': 12, 'training_p': 3}
Memory before del in train1Epoch: 
Memory allocated:  1795.2265625
Memory cached:  13382.0
Memory after del in train1Epoch: 
Memory allocated:  1790.64892578125
Memory cached:  1856.0
	 epoch  0 training error:  tensor(1.0264, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1795.3056640625
Memory cached:  

[I 2023-10-27 08:06:13,943] Trial 2 finished with value: 0.5890437960624695 and parameters: {'W_layers': 5, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 7, 'W_layer_units_exponent_3': 4, 'W_layer_units_exponent_4': 8, 'D_layers': 5, 'D_layer_units_exponent_0': 6, 'D_layer_units_exponent_1': 10, 'D_layer_units_exponent_2': 6, 'D_layer_units_exponent_3': 7, 'D_layer_units_exponent_4': 10, 'log_learning_rate': -4.779773625216466, 'log_learning_rate_D': -3.9338202310108814, 'training_batch_size': 12, 'training_p': 3}. Best is trial 0 with value: 0.19934989511966705.


Memory before del in train1Epoch: 
Memory allocated:  4549.99951171875
Memory cached:  4622.0
Memory after del in train1Epoch: 
Memory allocated:  4548.85400390625
Memory cached:  4602.0
Time for this trial:  572.44588804245
Memory status after this trial: 
Memory allocated:  4545.93994140625
Memory cached:  4602.0
--------------------  Trial  3   --------------------
Start timing: 
Parameters: 
{'W_layers': 4, 'W_layer_units_exponent_0': 7, 'W_layer_units_exponent_1': 4, 'W_layer_units_exponent_2': 8, 'W_layer_units_exponent_3': 5, 'D_layers': 2, 'D_layer_units_exponent_0': 6, 'D_layer_units_exponent_1': 5, 'log_learning_rate': -4.614191965232388, 'log_learning_rate_D': -2.3236406508191303, 'training_batch_size': 11, 'training_p': 6}
Memory before del in train1Epoch: 
Memory allocated:  1790.44140625
Memory cached:  4772.0
Memory after del in train1Epoch: 
Memory allocated:  1785.86376953125
Memory cached:  1856.0
	 epoch  0 training error:  tensor(0.9015, device='cuda:0', grad_fn=<Di

[I 2023-10-27 08:12:33,276] Trial 3 finished with value: 0.20223453640937805 and parameters: {'W_layers': 4, 'W_layer_units_exponent_0': 7, 'W_layer_units_exponent_1': 4, 'W_layer_units_exponent_2': 8, 'W_layer_units_exponent_3': 5, 'D_layers': 2, 'D_layer_units_exponent_0': 6, 'D_layer_units_exponent_1': 5, 'log_learning_rate': -4.614191965232388, 'log_learning_rate_D': -2.3236406508191303, 'training_batch_size': 11, 'training_p': 6}. Best is trial 0 with value: 0.19934989511966705.


Memory status after this trial: 
Memory allocated:  2508.34228515625
Memory cached:  2518.0
--------------------  Trial  4   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 8, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 8, 'D_layers': 2, 'D_layer_units_exponent_0': 7, 'D_layer_units_exponent_1': 4, 'log_learning_rate': -2.5697602872133247, 'log_learning_rate_D': -3.533121646826288, 'training_batch_size': 10, 'training_p': 3}
Memory before del in train1Epoch: 
Memory allocated:  1792.234375
Memory cached:  7402.0
Memory after del in train1Epoch: 
Memory allocated:  1787.65673828125
Memory cached:  1856.0
	 epoch  0 training error:  tensor(0.9617, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1792.24755859375
Memory cached:  7404.0
Memory after del in train1Epoch: 
Memory allocated:  1787.669921875
Memory cached:  1856.0
Memory before del in train1Epoch: 
Memory allocated:  1792.2

[I 2023-10-27 08:19:54,741] Trial 4 finished with value: 0.1990862637758255 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 8, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 8, 'D_layers': 2, 'D_layer_units_exponent_0': 7, 'D_layer_units_exponent_1': 4, 'log_learning_rate': -2.5697602872133247, 'log_learning_rate_D': -3.533121646826288, 'training_batch_size': 10, 'training_p': 3}. Best is trial 4 with value: 0.1990862637758255.


Memory before del in train1Epoch: 
Memory allocated:  2975.59619140625
Memory cached:  3058.0
Memory after del in train1Epoch: 
Memory allocated:  2974.45068359375
Memory cached:  3036.0
Time for this trial:  441.13280868530273
Memory status after this trial: 
Memory allocated:  2973.03271484375
Memory cached:  3036.0
--------------------  Trial  5   --------------------
Start timing: 
Parameters: 
{'W_layers': 4, 'W_layer_units_exponent_0': 9, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 9, 'W_layer_units_exponent_3': 10, 'D_layers': 5, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 8, 'D_layer_units_exponent_3': 9, 'D_layer_units_exponent_4': 5, 'log_learning_rate': -4.425745224186867, 'log_learning_rate_D': -3.953491952406299, 'training_batch_size': 9, 'training_p': 4}
Memory before del in train1Epoch: 
Memory allocated:  1802.64892578125
Memory cached:  13170.0
Memory after del in train1Epoch: 
Memory allocated:  1801.000488

[I 2023-10-27 08:34:45,787] Trial 5 finished with value: 0.1983865648508072 and parameters: {'W_layers': 4, 'W_layer_units_exponent_0': 9, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 9, 'W_layer_units_exponent_3': 10, 'D_layers': 5, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 8, 'D_layer_units_exponent_3': 9, 'D_layer_units_exponent_4': 5, 'log_learning_rate': -4.425745224186867, 'log_learning_rate_D': -3.953491952406299, 'training_batch_size': 9, 'training_p': 4}. Best is trial 5 with value: 0.1983865648508072.


Memory before del in train1Epoch: 
Memory allocated:  6221.82568359375
Memory cached:  6326.0
Memory after del in train1Epoch: 
Memory allocated:  6220.68017578125
Memory cached:  6326.0
Time for this trial:  890.675518989563
Memory status after this trial: 
Memory allocated:  6212.10205078125
Memory cached:  6324.0
--------------------  Trial  6   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 10, 'W_layer_units_exponent_1': 6, 'D_layers': 3, 'D_layer_units_exponent_0': 7, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 6, 'log_learning_rate': -1.7304971426036273, 'log_learning_rate_D': -4.353074288018118, 'training_batch_size': 6, 'training_p': 3}
Memory before del in train1Epoch: 
Memory allocated:  1785.578125
Memory cached:  6324.0
Memory after del in train1Epoch: 
Memory allocated:  1785.38916015625
Memory cached:  1838.0
	 epoch  0 training error:  tensor(5.9608, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del i

[I 2023-10-27 09:22:03,456] Trial 6 finished with value: 0.20198531448841095 and parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 10, 'W_layer_units_exponent_1': 6, 'D_layers': 3, 'D_layer_units_exponent_0': 7, 'D_layer_units_exponent_1': 6, 'D_layer_units_exponent_2': 6, 'log_learning_rate': -1.7304971426036273, 'log_learning_rate_D': -4.353074288018118, 'training_batch_size': 6, 'training_p': 3}. Best is trial 5 with value: 0.1983865648508072.


--------------------  Trial  7   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 10, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 5, 'D_layers': 3, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 6, 'log_learning_rate': -1.385564581192832, 'log_learning_rate_D': -3.848975189575293, 'training_batch_size': 11, 'training_p': 6}
Memory before del in train1Epoch: 
Memory allocated:  1794.9453125
Memory cached:  13920.0
Memory after del in train1Epoch: 
Memory allocated:  1790.36767578125
Memory cached:  1858.0
	 epoch  0 training error:  tensor(0.8429, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1795.0244140625
Memory cached:  13924.0
Memory after del in train1Epoch: 
Memory allocated:  1790.380859375
Memory cached:  1858.0
Memory before del in train1Epoch: 
Memory allocated:  1795.0244140625
Memory cached:  13924.0
Memory after del in trai

[W 2023-10-27 09:23:57,097] Trial 7 failed with parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 10, 'W_layer_units_exponent_1': 8, 'W_layer_units_exponent_2': 5, 'D_layers': 3, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 5, 'D_layer_units_exponent_2': 6, 'log_learning_rate': -1.385564581192832, 'log_learning_rate_D': -3.848975189575293, 'training_batch_size': 11, 'training_p': 6} because of the following error: The value nan is not acceptable.
[W 2023-10-27 09:23:57,100] Trial 7 failed with value tensor(nan, device='cuda:0', grad_fn=<DivBackward0>).


Memory status after this trial: 
Memory allocated:  4633.05517578125
Memory cached:  4814.0
--------------------  Trial  8   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 5, 'W_layer_units_exponent_2': 7, 'D_layers': 2, 'D_layer_units_exponent_0': 5, 'D_layer_units_exponent_1': 6, 'log_learning_rate': -4.9952265952903625, 'log_learning_rate_D': -3.737079534480144, 'training_batch_size': 12, 'training_p': 7}
Memory before del in train1Epoch: 
Memory allocated:  1790.310546875
Memory cached:  4834.0
Memory after del in train1Epoch: 
Memory allocated:  1785.73291015625
Memory cached:  1856.0
	 epoch  0 training error:  tensor(0.8312, device='cuda:0', grad_fn=<DivBackward0>)
Memory before del in train1Epoch: 
Memory allocated:  1790.32373046875
Memory cached:  3346.0
Memory after del in train1Epoch: 
Memory allocated:  1785.74609375
Memory cached:  1856.0
Memory before del in train1Epoch: 
Memory allocated:  1790

[I 2023-10-27 09:29:09,102] Trial 8 finished with value: 0.6671671271324158 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 5, 'W_layer_units_exponent_2': 7, 'D_layers': 2, 'D_layer_units_exponent_0': 5, 'D_layer_units_exponent_1': 6, 'log_learning_rate': -4.9952265952903625, 'log_learning_rate_D': -3.737079534480144, 'training_batch_size': 12, 'training_p': 7}. Best is trial 5 with value: 0.1983865648508072.


Memory before del in train1Epoch: 
Memory allocated:  2155.87451171875
Memory cached:  2194.0
Memory after del in train1Epoch: 
Memory allocated:  2154.72900390625
Memory cached:  2172.0
Time for this trial:  311.61829113960266
Memory status after this trial: 
Memory allocated:  2154.27294921875
Memory cached:  2172.0
--------------------  Trial  9   --------------------
Start timing: 
Parameters: 
{'W_layers': 3, 'W_layer_units_exponent_0': 10, 'W_layer_units_exponent_1': 4, 'W_layer_units_exponent_2': 10, 'D_layers': 2, 'D_layer_units_exponent_0': 4, 'D_layer_units_exponent_1': 6, 'log_learning_rate': -4.193569945077803, 'log_learning_rate_D': -3.7606333138637114, 'training_batch_size': 6, 'training_p': 5}
Memory before del in train1Epoch: 
Memory allocated:  1784.931640625
Memory cached:  2696.0
Memory after del in train1Epoch: 
Memory allocated:  1784.74267578125
Memory cached:  1836.0
	 epoch  0 training error:  tensor(0.5834, device='cuda:0', grad_fn=<DivBackward0>)
Memory before

[I 2023-10-27 10:21:17,017] Trial 9 finished with value: 0.20227700471878052 and parameters: {'W_layers': 3, 'W_layer_units_exponent_0': 10, 'W_layer_units_exponent_1': 4, 'W_layer_units_exponent_2': 10, 'D_layers': 2, 'D_layer_units_exponent_0': 4, 'D_layer_units_exponent_1': 6, 'log_learning_rate': -4.193569945077803, 'log_learning_rate_D': -3.7606333138637114, 'training_batch_size': 6, 'training_p': 5}. Best is trial 5 with value: 0.1983865648508072.


Memory status after this trial: 
Memory allocated:  4608.14794921875
Memory cached:  5312.0
--------------------  Trial  10   --------------------
Start timing: 
Parameters: 
{'W_layers': 2, 'W_layer_units_exponent_0': 10, 'W_layer_units_exponent_1': 10, 'D_layers': 5, 'D_layer_units_exponent_0': 5, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 4, 'D_layer_units_exponent_3': 6, 'D_layer_units_exponent_4': 4, 'log_learning_rate': -3.7181322845738674, 'log_learning_rate_D': -3.8596771414035835, 'training_batch_size': 11, 'training_p': 6}


[W 2023-10-27 10:21:20,006] Trial 10 failed with parameters: {'W_layers': 2, 'W_layer_units_exponent_0': 10, 'W_layer_units_exponent_1': 10, 'D_layers': 5, 'D_layer_units_exponent_0': 5, 'D_layer_units_exponent_1': 8, 'D_layer_units_exponent_2': 4, 'D_layer_units_exponent_3': 6, 'D_layer_units_exponent_4': 4, 'log_learning_rate': -3.7181322845738674, 'log_learning_rate_D': -3.8596771414035835, 'training_batch_size': 11, 'training_p': 6} because of the following error: RuntimeError('CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.73 GiB total capacity; 13.47 GiB already allocated; 64.12 MiB free; 14.20 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF').
Traceback (most recent call last):
  File "/home/shengduo/anaconda3/lib/python3.9/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(tri

RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.73 GiB total capacity; 13.47 GiB already allocated; 64.12 MiB free; 14.20 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [1]:
dim_xis = [1, 2, 4]
for idx, dim_xi in enumerate(dim_xis):
    print("-" * 20, " dim_xi = ", dim_xi, " ", "-" * 20)
    shit = joblib.load('./data/study_dim_xi_Dexp_{0}.pkl'.format(dim_xi))
    print("best_params: ", shit.best_params)
    print("best_value: ", shit.best_value)

--------------------  dim_xi =  1   --------------------


NameError: name 'joblib' is not defined

# Demo: Try taking gradients w.r.t. inputs, this works

In [38]:
memory_stats()

13429.31591796875
14524.0




In [39]:
del this_study

13429.31591796875
14524.0


In [None]:
OptKwgs['dim_xi'] = 8
myOpt = OptunaObj(OptKwgs)
this_study = optuna.create_study(direction='minimize')
this_study.optimize(myOpt.objective, n_trials=50)
studys.append(this_study)

[I 2023-10-20 10:32:48,058] A new study created in memory with name: no-name-1b7e167f-d716-46cc-b919-41be5be1dacf


--------------------  Trial  0   --------------------
Parameters: 
{'W_layers': 7, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 7, 'W_layer_units_exponent_3': 10, 'W_layer_units_exponent_4': 6, 'W_layer_units_exponent_5': 4, 'W_layer_units_exponent_6': 4, 'D_layers': 2, 'D_layer_units_exponent_0': 7, 'D_layer_units_exponent_1': 5, 'log_learning_rate': -2.1740270893419456, 'log_learning_rate_D': -1.8049883560485425, 'training_batch_size': 11, 'training_p': 2}
DXi:  tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0', requires_grad=True)
DXi:  tensor([[ 0.1296,  0.1186, -0.0344,  ..., -0.0241, -0.0290, -0.0047],
        [ 0.1296,  0.1186, -0.0344,  ..., -0.0241, -0.0290, -0.0047],
        [ 0.1296,  0.1186, -0.0344,  ..., -0.0241

[W 2023-10-20 10:33:10,813] Trial 0 failed with parameters: {'W_layers': 7, 'W_layer_units_exponent_0': 6, 'W_layer_units_exponent_1': 6, 'W_layer_units_exponent_2': 7, 'W_layer_units_exponent_3': 10, 'W_layer_units_exponent_4': 6, 'W_layer_units_exponent_5': 4, 'W_layer_units_exponent_6': 4, 'D_layers': 2, 'D_layer_units_exponent_0': 7, 'D_layer_units_exponent_1': 5, 'log_learning_rate': -2.1740270893419456, 'log_learning_rate_D': -1.8049883560485425, 'training_batch_size': 11, 'training_p': 2} because of the following error: The value nan is not acceptable.
[W 2023-10-20 10:33:10,815] Trial 0 failed with value tensor(nan, device='cuda:0', grad_fn=<DivBackward0>).


--------------------  Trial  1   --------------------
Parameters: 
{'W_layers': 8, 'W_layer_units_exponent_0': 8, 'W_layer_units_exponent_1': 10, 'W_layer_units_exponent_2': 4, 'W_layer_units_exponent_3': 7, 'W_layer_units_exponent_4': 7, 'W_layer_units_exponent_5': 10, 'W_layer_units_exponent_6': 6, 'W_layer_units_exponent_7': 7, 'D_layers': 5, 'D_layer_units_exponent_0': 10, 'D_layer_units_exponent_1': 7, 'D_layer_units_exponent_2': 7, 'D_layer_units_exponent_3': 10, 'D_layer_units_exponent_4': 8, 'log_learning_rate': -3.297885589978985, 'log_learning_rate_D': -4.090366640652964, 'training_batch_size': 9, 'training_p': 6}
DXi:  tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0', requires_grad=True)
DXi:  tensor([[ 0.0571, -0.0092,  0.0313,  ...,  0.0476,  0.0631, -0.03

In [1]:
b = torch.tensor([[2],[1]])
b

NameError: name 'torch' is not defined

In [399]:
a / b

tensor([[-0.0381,  0.3545, -0.9407, -0.6392, -0.7341],
        [ 0.4466, -1.4642,  0.0669,  1.8598, -0.8693]])

In [3]:
# Construct my NN with initialized default parameters 
NNs = [16, 16]
input_dim = 4
output_dim = 1

myPP = PP(NNs, input_dim, output_dim)

In [23]:
# Try taking gradients w.r.t. the inputs
x = torch.tensor([[1., 2., -1., 3.]], requires_grad=True)
y = torch.sum(myPP(x))
dydx = torch.autograd.grad(outputs=y, inputs=x, retain_graph=True)[0]

# Show values and gradients
print("y: ", y)
print("dydx: ", dydx)

y:  tensor(-0.1882, grad_fn=<SumBackward0>)
dydx:  tensor([[ 0.0650, -0.0497, -0.0758, -0.0635]])


In [22]:
# Try taking gradients w.r.t. the inputs
x = torch.tensor([[1., 2., -1., 3.], [-1., -3., 2., 5.]], requires_grad=True)
y = torch.sum(myPP(x))
dydx = torch.autograd.grad(outputs=y, inputs=x, retain_graph=True)[0]

# Show values and gradients
print("y: ", y)
print("dydx: ", dydx)

y:  tensor(-0.3059, grad_fn=<SumBackward0>)
dydx:  tensor([[ 0.0650, -0.0497, -0.0758, -0.0635],
        [ 0.0623, -0.0945, -0.0193, -0.0369]])


In [20]:
dydx[0]

tensor([[ 0.0650, -0.0497, -0.0758, -0.0635],
        [ 0.0623, -0.0945, -0.0193, -0.0369]])

In [21]:
y

tensor(-0.3059, grad_fn=<SumBackward0>)