In [1]:
import pandas as pd
from tqdm.notebook import tqdm
import torch
import pickle
import time
from torch.utils.data import DataLoader
import optuna
from transformers import BertTokenizer, VisualBertForPreTraining
import numpy as np

In [2]:
data_dir = r'E:\datasets\MADE\3_graduation\parthplc\archive\data\\'

train_path = data_dir + 'train.jsonl'
dev_path = data_dir + 'dev.jsonl'


train_data = pd.read_json(train_path, lines=True)
test_data = pd.read_json(dev_path, lines=True)

test_data.head(3)

Unnamed: 0,id,img,label,text
0,8291,img/08291.png,1,white people is this a shooting range
1,46971,img/46971.png,1,bravery at its finest
2,3745,img/03745.png,1,your order comes to $37.50 and your white priv...


In [3]:
with open('d:\\visual_embeddings_val.pkl', 'rb') as f:
    visual_embeddings_val = pickle.load(f)
    
val_dict = {}
for x in test_data.values:
    if x[1] in visual_embeddings_val:
        val_dict[x[1]] = {'label':x[2], 'text':x[3], 'id':x[1]}


In [4]:
with open('d:\\visual_embeddings_train.pkl', 'rb') as f:
    visual_embeddings_train = pickle.load(f)
    
train_dict = {}
for x in train_data.values:
    if x[1] in visual_embeddings_train:
        train_dict[x[1]] = {'label':x[2], 'text':x[3], 'id':x[1]}


In [5]:
class FeaturesDataset(torch.utils.data.Dataset):
    def __init__(self, visual_embeddings, labels):
        self.visual_embeddings = visual_embeddings
        self.labels = labels
        
        self.idx2id = [{'id':k, 'label':labels[k]['label'], 'text':labels[k]['text']}
                       for i, k in enumerate(labels)]
    
    
    def __getitem__(self, index: int):
        id = self.idx2id[index]['id']
        return id, self.visual_embeddings[id][0], self.labels[id]['text'][:77], self.labels[id]['label']

    
    def __len__(self):
        return len(self.idx2id)

In [6]:
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']


def train_model(model, train_loader, val_loader, loss, optimizer, num_epochs, scheduler, device):
    t1 = time.time()
    best_model_name = None
    loss_history = []
    train_history = []
    val_history = []
    top_val_accuracy = 0.64 
    for epoch in range(num_epochs):
        model.train()
        loss_accum = 0
        correct_samples = 0
        total_samples = 0
        for i_step, (id, visual_embeds, text, y) in enumerate(train_loader):
            y = y.to(device)
            visual_embeds = visual_embeds.to(device)    
            tokens = tokenizer(list(text), padding='max_length', max_length=77)

            input_ids = torch.tensor(tokens["input_ids"], device=device)
            attention_mask = torch.tensor(tokens["attention_mask"], device=device)
            token_type_ids = torch.tensor(tokens["token_type_ids"], device=device)

            visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.long, device=device)
            visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long, device=device)


            outputs = model(input_ids=input_ids, 
                              attention_mask=attention_mask, 
                              token_type_ids=token_type_ids, 
                              visual_embeds=visual_embeds, 
                              visual_attention_mask=visual_attention_mask, 
                              visual_token_type_ids=visual_token_type_ids
                          )
    
            prediction = outputs.prediction_logits.sum(axis=1)
            
            loss_value = loss(prediction, y)
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
            
            _, indices = torch.max(prediction, 1)
            correct_samples += torch.sum(indices == y)
            total_samples += y.shape[0]
            
            loss_accum += loss_value

        ave_loss = loss_accum / (i_step + 1)
        train_accuracy = float(correct_samples) / total_samples
        val_accuracy = compute_accuracy(model, val_loader, device)
        
        loss_history.append(float(ave_loss))
        train_history.append(train_accuracy)
        val_history.append(val_accuracy)
        if scheduler != None:
            scheduler.step()

        print("Epoch: %i; %.2f sec; lr: %f; Average loss: %.2f, Train accuracy: %.4f, Val accuracy: %.4f" % 
              (epoch, round(time.time() - t1, 2), get_lr(optimizer), ave_loss, train_accuracy, val_accuracy))

  
        if val_accuracy > top_val_accuracy:
            top_val_accuracy = val_accuracy
            model_name = f'classifier_{epoch}_{round(val_accuracy, 3)}.ckpt'
            best_model_name = model_name
            torch.save(model, open(model_name, 'wb'))
            print("saved", model_name)

        if len(val_history) > 4:
            print(f'{(val_history[-1] - val_history[-2]) < 0.001} {(val_history[-2] - val_history[-3]) < 0.001} \
            {(val_history[-3] - val_history[-4]) < 0.001} {(val_history[-4] - val_history[-5]) < 0.001}')
        
        if len(val_history) > 4 and (val_history[-1] - val_history[-2]) < 0.001  and \
                                    (val_history[-2] - val_history[-3]) < 0.001 and \
                                    (val_history[-3] - val_history[-4]) < 0.001 and \
                                    (val_history[-4] - val_history[-5]) < 0.001:
            print('pruned')
            return loss_history, train_history, val_history, best_model_name
        
    return loss_history, train_history, val_history, best_model_name
        
    
def compute_accuracy(model, loader, device):
    """
    Computes accuracy on the dataset wrapped in a loader    
    Returns: accuracy as a float value between 0 and 1
    """
    model.eval()
    correct_samples = 0
    total_samples = 0 
    for i_step, (id, visual_embeds, text, y) in enumerate(loader):
        y = y.to(device)
        visual_embeds = visual_embeds.to(device)    
        tokens = tokenizer(list(text), padding='max_length', max_length=77)

        input_ids = torch.tensor(tokens["input_ids"], device=device)
        attention_mask = torch.tensor(tokens["attention_mask"], device=device)
        token_type_ids = torch.tensor(tokens["token_type_ids"], device=device)

        visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.long, device=device)
        visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long, device=device)


        outputs = model(input_ids=input_ids, 
                          attention_mask=attention_mask, 
                          token_type_ids=token_type_ids, 
                          visual_embeds=visual_embeds, 
                          visual_attention_mask=visual_attention_mask, 
                          visual_token_type_ids=visual_token_type_ids
                      )
    
        prediction = outputs.prediction_logits.sum(axis=1)
            
        _, indices = torch.max(prediction, 1)
        correct_samples += torch.sum(indices == y)
        total_samples += y.shape[0]            

    val_accuracy = float(correct_samples) / total_samples
         
    return val_accuracy

In [7]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [8]:
features_train_dataset = FeaturesDataset(visual_embeddings_train, train_dict)
features_val_dataset = FeaturesDataset(visual_embeddings_val, val_dict)

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [10]:
loss = torch.nn.CrossEntropyLoss()

In [11]:

if False:
    layer_count=189

    step_size=4
    batch_size=400
    learning_rate=0.0036137
    gamma=0.63426

    model = VisualBertForPreTraining.from_pretrained('uclanlp/visualbert-nlvr2-coco-pre') # this checkpoint has 1024 dimensional visual embeddings projection
    for i, param in enumerate(model.parameters()):
        param.requires_grad = False
        if i > layer_count:
            break

    model.cls.predictions.decoder = torch.nn.Linear(in_features=768, out_features=2, bias=True)
    model = model.to(device)


    params = []
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            params.append(param)        

    for i, (name, param) in enumerate(model.named_parameters()):
        if param.requires_grad == True:
            print(i, name)

    optimizer = torch.optim.Adam(params, lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

    loss_history, train_history, val_history, best_model_name = train_model(
            model, 
            DataLoader(features_train_dataset, batch_size=batch_size),
            DataLoader(features_val_dataset, batch_size=500),
            loss, optimizer, 2, scheduler, device)


#189 visual_bert.encoder.layer.11.attention.self.value.weight
#190 visual_bert.encoder.layer.11.attention.self.value.bias
#191 visual_bert.encoder.layer.11.attention.output.dense.weight
#192 visual_bert.encoder.layer.11.attention.output.dense.bias
#193 visual_bert.encoder.layer.11.attention.output.LayerNorm.weight
#194 visual_bert.encoder.layer.11.attention.output.LayerNorm.bias
#195 visual_bert.encoder.layer.11.intermediate.dense.weight
#196 visual_bert.encoder.layer.11.intermediate.dense.bias
#197 visual_bert.encoder.layer.11.output.dense.weight
#198 visual_bert.encoder.layer.11.output.dense.bias

In [12]:
#assert False

In [13]:
epoch_count = 30

In [None]:
def objective(trial):        
    layer_count = trial.suggest_int("layer_count", 189, 211)    
    step_size = trial.suggest_int("step_size", 4, 10, 2)      
    batch_size = trial.suggest_int("batch_size", 32, 2080, 64)    
    learning_rate = trial.suggest_float("learning_rate", 1e-6, 1e-2)
    gamma = trial.suggest_float("gamma", 0.5, 1)
    
    model = VisualBertForPreTraining.from_pretrained('uclanlp/visualbert-nlvr2-coco-pre') # this checkpoint has 1024 dimensional visual embeddings projection
    for i, param in enumerate(model.parameters()):
        param.requires_grad = False
        if i > layer_count:
            break

    model.cls.predictions.decoder = torch.nn.Linear(in_features=768, out_features=2, bias=True)
    model = model.to(device)


    params = []
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            params.append(param)        

    optimizer = torch.optim.Adam(params, lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
    
    try:
        loss_history, train_history, val_history, best_model_name = train_model(
            model, 
            DataLoader(features_train_dataset, batch_size=batch_size),
            DataLoader(features_val_dataset, batch_size=500),
            loss, optimizer, epoch_count, scheduler, device)
    
    except Exception as ex:
        print('Exception:', ex)
        return 0
    
    return (np.mean(val_history) + np.max(val_history) + val_history[-1] + len(val_history) / epoch_count / 2) / 4


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=200)

[32m[I 2022-11-07 02:27:57,852][0m A new study created in memory with name: no-name-0fb75aaf-af88-417d-8cb9-0a417fe7eb60[0m
[32m[I 2022-11-07 02:28:02,901][0m Trial 0 finished with value: 0.0 and parameters: {'layer_count': 190, 'step_size': 8, 'batch_size': 992, 'learning_rate': 0.007271698825106093, 'gamma': 0.7581052965288542}. Best is trial 0 with value: 0.0.[0m


Exception: CUDA out of memory. Tried to allocate 1.39 GiB (GPU 0; 8.00 GiB total capacity; 5.94 GiB already allocated; 116.50 MiB free; 6.15 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 35.74 sec; lr: 0.003929; Average loss: 125.08, Train accuracy: 0.5382, Val accuracy: 0.5000
Epoch: 1; 71.72 sec; lr: 0.003929; Average loss: 32.30, Train accuracy: 0.5132, Val accuracy: 0.5000
Epoch: 2; 108.03 sec; lr: 0.003929; Average loss: 15.60, Train accuracy: 0.5529, Val accuracy: 0.5000
Epoch: 3; 144.55 sec; lr: 0.003929; Average loss: 17.34, Train accuracy: 0.5352, Val accuracy: 0.5000


[32m[I 2022-11-07 02:31:06,540][0m Trial 1 finished with value: 0.3958333333333333 and parameters: {'layer_count': 189, 'step_size': 10, 'batch_size': 160, 'learning_rate': 0.003929065374073511, 'gamma': 0.6560002701832426}. Best is trial 1 with value: 0.3958333333333333.[0m


Epoch: 4; 181.31 sec; lr: 0.003929; Average loss: 11.64, Train accuracy: 0.5354, Val accuracy: 0.5000
True True             True True
pruned


[32m[I 2022-11-07 02:31:09,875][0m Trial 2 finished with value: 0.0 and parameters: {'layer_count': 207, 'step_size': 8, 'batch_size': 1376, 'learning_rate': 0.005898138589999307, 'gamma': 0.6635077170713892}. Best is trial 1 with value: 0.3958333333333333.[0m


Exception: CUDA out of memory. Tried to allocate 1.93 GiB (GPU 0; 8.00 GiB total capacity; 5.13 GiB already allocated; 0 bytes free; 7.04 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF


[32m[I 2022-11-07 02:31:13,461][0m Trial 3 finished with value: 0.0 and parameters: {'layer_count': 210, 'step_size': 8, 'batch_size': 1632, 'learning_rate': 0.0019835347866479923, 'gamma': 0.982362508855197}. Best is trial 1 with value: 0.3958333333333333.[0m


Exception: CUDA out of memory. Tried to allocate 848.00 MiB (GPU 0; 8.00 GiB total capacity; 5.18 GiB already allocated; 0 bytes free; 6.61 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF


[32m[I 2022-11-07 02:31:16,920][0m Trial 4 finished with value: 0.0 and parameters: {'layer_count': 196, 'step_size': 4, 'batch_size': 1312, 'learning_rate': 0.00829400910408946, 'gamma': 0.9155419333810964}. Best is trial 1 with value: 0.3958333333333333.[0m


Exception: CUDA out of memory. Tried to allocate 1.84 GiB (GPU 0; 8.00 GiB total capacity; 4.91 GiB already allocated; 0 bytes free; 6.61 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF


[32m[I 2022-11-07 02:31:20,598][0m Trial 5 finished with value: 0.0 and parameters: {'layer_count': 208, 'step_size': 4, 'batch_size': 1248, 'learning_rate': 0.0050483117901796845, 'gamma': 0.9382460662209541}. Best is trial 1 with value: 0.3958333333333333.[0m


Exception: CUDA out of memory. Tried to allocate 1.75 GiB (GPU 0; 8.00 GiB total capacity; 4.69 GiB already allocated; 0 bytes free; 6.61 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 35.92 sec; lr: 0.008007; Average loss: 796.77, Train accuracy: 0.5404, Val accuracy: 0.5000
Epoch: 1; 71.68 sec; lr: 0.008007; Average loss: 248.32, Train accuracy: 0.5449, Val accuracy: 0.5000
Epoch: 2; 107.55 sec; lr: 0.008007; Average loss: 57.55, Train accuracy: 0.5314, Val accuracy: 0.5000
Epoch: 3; 143.51 sec; lr: 0.008007; Average loss: 72.87, Train accuracy: 0.5056, Val accuracy: 0.4940
Epoch: 4; 179.40 sec; lr: 0.008007; Average loss: 25.46, Train accuracy: 0.5480, Val accuracy: 0.5000
False True             True True
Epoch: 5; 215.12 sec; lr: 0.008007; Average loss: 17.49, Train accuracy: 0.5280, Val accuracy: 0.5000
True False        

[32m[I 2022-11-07 02:36:45,100][0m Trial 6 finished with value: 0.41233266398929047 and parameters: {'layer_count': 193, 'step_size': 8, 'batch_size': 480, 'learning_rate': 0.008006653683450472, 'gamma': 0.7344458510629484}. Best is trial 6 with value: 0.41233266398929047.[0m


Epoch: 8; 321.93 sec; lr: 0.005880; Average loss: 9.01, Train accuracy: 0.5214, Val accuracy: 0.5000
True True             True True
pruned


[32m[I 2022-11-07 02:36:48,354][0m Trial 7 finished with value: 0.0 and parameters: {'layer_count': 203, 'step_size': 8, 'batch_size': 1120, 'learning_rate': 0.00025470692285517146, 'gamma': 0.6099614480191624}. Best is trial 6 with value: 0.41233266398929047.[0m


Exception: CUDA out of memory. Tried to allocate 1.57 GiB (GPU 0; 8.00 GiB total capacity; 4.25 GiB already allocated; 0 bytes free; 6.61 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF


[32m[I 2022-11-07 02:36:51,625][0m Trial 8 finished with value: 0.0 and parameters: {'layer_count': 195, 'step_size': 4, 'batch_size': 800, 'learning_rate': 0.004072251568745169, 'gamma': 0.6949832115712964}. Best is trial 6 with value: 0.41233266398929047.[0m


Exception: CUDA out of memory. Tried to allocate 1.12 GiB (GPU 0; 8.00 GiB total capacity; 5.27 GiB already allocated; 0 bytes free; 6.54 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF


[32m[I 2022-11-07 02:36:54,891][0m Trial 9 finished with value: 0.0 and parameters: {'layer_count': 191, 'step_size': 10, 'batch_size': 1760, 'learning_rate': 0.009477278848826697, 'gamma': 0.9074903469238716}. Best is trial 6 with value: 0.41233266398929047.[0m


Exception: CUDA out of memory. Tried to allocate 914.00 MiB (GPU 0; 8.00 GiB total capacity; 4.78 GiB already allocated; 0 bytes free; 6.53 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 33.74 sec; lr: 0.006920; Average loss: 624.13, Train accuracy: 0.5364, Val accuracy: 0.5000
Epoch: 1; 67.49 sec; lr: 0.006920; Average loss: 94.36, Train accuracy: 0.5638, Val accuracy: 0.5000
Epoch: 2; 101.35 sec; lr: 0.006920; Average loss: 264.20, Train accuracy: 0.5262, Val accuracy: 0.5000
Epoch: 3; 135.25 sec; lr: 0.006920; Average loss: 53.68, Train accuracy: 0.5508, Val accuracy: 0.5000


[32m[I 2022-11-07 02:39:47,377][0m Trial 10 finished with value: 0.3958333333333333 and parameters: {'layer_count': 199, 'step_size': 6, 'batch_size': 352, 'learning_rate': 0.006919658226464542, 'gamma': 0.5304810216778443}. Best is trial 6 with value: 0.41233266398929047.[0m


Epoch: 4; 169.17 sec; lr: 0.006920; Average loss: 68.56, Train accuracy: 0.5241, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 38.64 sec; lr: 0.003223; Average loss: 24.78, Train accuracy: 0.5572, Val accuracy: 0.5000
Epoch: 1; 77.06 sec; lr: 0.003223; Average loss: 9.57, Train accuracy: 0.5428, Val accuracy: 0.5000
Epoch: 2; 115.48 sec; lr: 0.003223; Average loss: 8.83, Train accuracy: 0.5461, Val accuracy: 0.5000
Epoch: 3; 153.90 sec; lr: 0.003223; Average loss: 6.18, Train accuracy: 0.5448, Val accuracy: 0.5000


[32m[I 2022-11-07 02:43:02,056][0m Trial 11 finished with value: 0.3958333333333333 and parameters: {'layer_count': 189, 'step_size': 10, 'batch_size': 32, 'learning_rate': 0.0032232030843525796, 'gamma': 0.7977926996575779}. Best is trial 6 with value: 0.41233266398929047.[0m


Epoch: 4; 192.34 sec; lr: 0.003223; Average loss: 3.83, Train accuracy: 0.5490, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 35.88 sec; lr: 0.009640; Average loss: 1407.20, Train accuracy: 0.4857, Val accuracy: 0.5000
Epoch: 1; 71.42 sec; lr: 0.009640; Average loss: 205.98, Train accuracy: 0.5418, Val accuracy: 0.5000
Epoch: 2; 106.96 sec; lr: 0.009640; Average loss: 110.82, Train accuracy: 0.5198, Val accuracy: 0.4980
Epoch: 3; 142.48 sec; lr: 0.009640; Average loss: 160.51, Train accuracy: 0.5406, Val accuracy: 0.5000
Epoch: 4; 178.06 sec; lr: 0.009640; Average loss: 234.38, Train accuracy: 0.4939, Val accuracy: 0.5000
True False             True True
Epoch: 5; 213.53 sec; lr: 0.009640; Average loss: 57.54, Train accuracy: 0.5293, Val accuracy: 0.5000
True True             False True
Epoch: 6; 249.06 sec; lr: 0.009640; Average loss: 61.44, Train accuracy: 0.5078, Val accuracy: 0.5000
True True             True False


[32m[I 2022-11-07 02:47:49,050][0m Trial 12 finished with value: 0.40827058232931723 and parameters: {'layer_count': 193, 'step_size': 10, 'batch_size': 416, 'learning_rate': 0.009640339419228308, 'gamma': 0.7916784020222729}. Best is trial 6 with value: 0.41233266398929047.[0m


Epoch: 7; 284.62 sec; lr: 0.009640; Average loss: 34.51, Train accuracy: 0.5232, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 35.41 sec; lr: 0.009656; Average loss: 626.26, Train accuracy: 0.4993, Val accuracy: 0.5000
Epoch: 1; 70.60 sec; lr: 0.009656; Average loss: 238.28, Train accuracy: 0.5657, Val accuracy: 0.5000
Epoch: 2; 105.66 sec; lr: 0.009656; Average loss: 198.27, Train accuracy: 0.5398, Val accuracy: 0.5000
Epoch: 3; 140.78 sec; lr: 0.009656; Average loss: 92.91, Train accuracy: 0.5652, Val accuracy: 0.4980


[32m[I 2022-11-07 02:50:47,417][0m Trial 13 finished with value: 0.39513052208835336 and parameters: {'layer_count': 194, 'step_size': 6, 'batch_size': 544, 'learning_rate': 0.009655570529118106, 'gamma': 0.8214875044742345}. Best is trial 6 with value: 0.41233266398929047.[0m


Epoch: 4; 175.82 sec; lr: 0.009656; Average loss: 53.31, Train accuracy: 0.5318, Val accuracy: 0.4980
True True             True True
pruned


[32m[I 2022-11-07 02:50:50,556][0m Trial 14 finished with value: 0.0 and parameters: {'layer_count': 200, 'step_size': 10, 'batch_size': 608, 'learning_rate': 0.008413268933214323, 'gamma': 0.8353653871990527}. Best is trial 6 with value: 0.41233266398929047.[0m


Exception: CUDA out of memory. Tried to allocate 872.00 MiB (GPU 0; 8.00 GiB total capacity; 4.11 GiB already allocated; 0 bytes free; 6.63 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 35.75 sec; lr: 0.008080; Average loss: 654.51, Train accuracy: 0.5024, Val accuracy: 0.5000
Epoch: 1; 71.31 sec; lr: 0.008080; Average loss: 494.32, Train accuracy: 0.5482, Val accuracy: 0.5000
Epoch: 2; 106.97 sec; lr: 0.008080; Average loss: 123.18, Train accuracy: 0.5451, Val accuracy: 0.5000
Epoch: 3; 142.62 sec; lr: 0.008080; Average loss: 135.22, Train accuracy: 0.5198, Val accuracy: 0.5000


[32m[I 2022-11-07 02:53:51,218][0m Trial 15 finished with value: 0.3958333333333333 and parameters: {'layer_count': 193, 'step_size': 6, 'batch_size': 352, 'learning_rate': 0.008080242771766949, 'gamma': 0.7378884275638811}. Best is trial 6 with value: 0.41233266398929047.[0m


Epoch: 4; 178.29 sec; lr: 0.008080; Average loss: 149.14, Train accuracy: 0.5572, Val accuracy: 0.5000
True True             True True
pruned


[32m[I 2022-11-07 02:53:54,675][0m Trial 16 finished with value: 0.0 and parameters: {'layer_count': 197, 'step_size': 10, 'batch_size': 2016, 'learning_rate': 0.009968170493644495, 'gamma': 0.5786727543180652}. Best is trial 6 with value: 0.41233266398929047.[0m


Exception: CUDA out of memory. Tried to allocate 1.02 GiB (GPU 0; 8.00 GiB total capacity; 5.42 GiB already allocated; 0 bytes free; 7.20 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF


[32m[I 2022-11-07 02:53:58,178][0m Trial 17 finished with value: 0.0 and parameters: {'layer_count': 204, 'step_size': 8, 'batch_size': 800, 'learning_rate': 0.006052240570324996, 'gamma': 0.8662048818015815}. Best is trial 6 with value: 0.41233266398929047.[0m


Exception: CUDA out of memory. Tried to allocate 1.12 GiB (GPU 0; 8.00 GiB total capacity; 4.87 GiB already allocated; 0 bytes free; 7.21 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 36.38 sec; lr: 0.008849; Average loss: 783.37, Train accuracy: 0.5069, Val accuracy: 0.5000
Epoch: 1; 72.51 sec; lr: 0.008849; Average loss: 311.93, Train accuracy: 0.5099, Val accuracy: 0.5000
Epoch: 2; 108.69 sec; lr: 0.008849; Average loss: 179.83, Train accuracy: 0.5240, Val accuracy: 0.5000
Epoch: 3; 144.90 sec; lr: 0.008849; Average loss: 39.14, Train accuracy: 0.5363, Val accuracy: 0.5000


[32m[I 2022-11-07 02:57:01,893][0m Trial 18 finished with value: 0.3952309236947791 and parameters: {'layer_count': 192, 'step_size': 10, 'batch_size': 352, 'learning_rate': 0.008849491301620755, 'gamma': 0.7430456234252903}. Best is trial 6 with value: 0.41233266398929047.[0m


Epoch: 4; 181.11 sec; lr: 0.008849; Average loss: 123.06, Train accuracy: 0.5125, Val accuracy: 0.4980
True True             True True
pruned


[32m[I 2022-11-07 02:57:04,860][0m Trial 19 finished with value: 0.0 and parameters: {'layer_count': 198, 'step_size': 8, 'batch_size': 672, 'learning_rate': 0.00738150569919966, 'gamma': 0.7742396845676132}. Best is trial 6 with value: 0.41233266398929047.[0m


Exception: CUDA out of memory. Tried to allocate 964.00 MiB (GPU 0; 8.00 GiB total capacity; 4.49 GiB already allocated; 0 bytes free; 7.21 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF


[32m[I 2022-11-07 02:57:07,977][0m Trial 20 finished with value: 0.0 and parameters: {'layer_count': 202, 'step_size': 6, 'batch_size': 928, 'learning_rate': 0.006269970951241684, 'gamma': 0.7056534665257855}. Best is trial 6 with value: 0.41233266398929047.[0m


Exception: CUDA out of memory. Tried to allocate 1.30 GiB (GPU 0; 8.00 GiB total capacity; 5.58 GiB already allocated; 0 bytes free; 7.21 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 34.29 sec; lr: 0.007030; Average loss: 814.80, Train accuracy: 0.5222, Val accuracy: 0.5000
Epoch: 1; 68.30 sec; lr: 0.007030; Average loss: 109.18, Train accuracy: 0.5317, Val accuracy: 0.5000
Epoch: 2; 102.29 sec; lr: 0.007030; Average loss: 106.29, Train accuracy: 0.5050, Val accuracy: 0.5000
Epoch: 3; 136.31 sec; lr: 0.007030; Average loss: 176.45, Train accuracy: 0.5099, Val accuracy: 0.5000


[32m[I 2022-11-07 03:00:00,741][0m Trial 21 finished with value: 0.3946285140562249 and parameters: {'layer_count': 199, 'step_size': 6, 'batch_size': 352, 'learning_rate': 0.007030090755363396, 'gamma': 0.54988389778495}. Best is trial 6 with value: 0.41233266398929047.[0m


Epoch: 4; 170.37 sec; lr: 0.007030; Average loss: 108.40, Train accuracy: 0.5177, Val accuracy: 0.4960
True True             True True
pruned
Epoch: 0; 35.43 sec; lr: 0.008993; Average loss: 306.01, Train accuracy: 0.5566, Val accuracy: 0.5000
Epoch: 1; 70.70 sec; lr: 0.008993; Average loss: 49.76, Train accuracy: 0.5253, Val accuracy: 0.5000
Epoch: 2; 105.90 sec; lr: 0.008993; Average loss: 34.31, Train accuracy: 0.5209, Val accuracy: 0.5000
Epoch: 3; 141.07 sec; lr: 0.008993; Average loss: 12.10, Train accuracy: 0.5584, Val accuracy: 0.5000


[32m[I 2022-11-07 03:02:59,266][0m Trial 22 finished with value: 0.3958333333333333 and parameters: {'layer_count': 194, 'step_size': 6, 'batch_size': 160, 'learning_rate': 0.00899257952776486, 'gamma': 0.5026081139950755}. Best is trial 6 with value: 0.41233266398929047.[0m


Epoch: 4; 176.24 sec; lr: 0.008993; Average loss: 13.71, Train accuracy: 0.5559, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 36.40 sec; lr: 0.002232; Average loss: 124.18, Train accuracy: 0.5349, Val accuracy: 0.5000
Epoch: 1; 72.69 sec; lr: 0.002232; Average loss: 22.89, Train accuracy: 0.5447, Val accuracy: 0.5000
Epoch: 2; 109.00 sec; lr: 0.002232; Average loss: 7.68, Train accuracy: 0.5581, Val accuracy: 0.5000
Epoch: 3; 145.32 sec; lr: 0.002232; Average loss: 11.53, Train accuracy: 0.5536, Val accuracy: 0.5000


[32m[I 2022-11-07 03:06:03,349][0m Trial 23 finished with value: 0.3958333333333333 and parameters: {'layer_count': 189, 'step_size': 10, 'batch_size': 160, 'learning_rate': 0.0022319361744006466, 'gamma': 0.8037166218511764}. Best is trial 6 with value: 0.41233266398929047.[0m


Epoch: 4; 181.71 sec; lr: 0.002232; Average loss: 9.36, Train accuracy: 0.5441, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 38.42 sec; lr: 0.002818; Average loss: 19.65, Train accuracy: 0.5438, Val accuracy: 0.5000
Epoch: 1; 76.57 sec; lr: 0.002818; Average loss: 6.37, Train accuracy: 0.5509, Val accuracy: 0.5000
Epoch: 2; 114.74 sec; lr: 0.002818; Average loss: 7.18, Train accuracy: 0.5538, Val accuracy: 0.5000
Epoch: 3; 152.90 sec; lr: 0.002818; Average loss: 6.04, Train accuracy: 0.5555, Val accuracy: 0.5000


[32m[I 2022-11-07 03:09:16,740][0m Trial 24 finished with value: 0.3958333333333333 and parameters: {'layer_count': 192, 'step_size': 10, 'batch_size': 32, 'learning_rate': 0.002817972590628229, 'gamma': 0.8726388158932242}. Best is trial 6 with value: 0.41233266398929047.[0m


Epoch: 4; 191.02 sec; lr: 0.002818; Average loss: 4.28, Train accuracy: 0.5568, Val accuracy: 0.5000
True True             True True
pruned


[32m[I 2022-11-07 03:09:31,169][0m Trial 25 finished with value: 0.0 and parameters: {'layer_count': 195, 'step_size': 6, 'batch_size': 480, 'learning_rate': 0.00895795748852639, 'gamma': 0.618559584848301}. Best is trial 6 with value: 0.41233266398929047.[0m


Exception: CUDA out of memory. Tried to allocate 996.00 MiB (GPU 0; 8.00 GiB total capacity; 2.58 GiB already allocated; 0 bytes free; 6.51 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 36.27 sec; lr: 0.004631; Average loss: 297.95, Train accuracy: 0.5234, Val accuracy: 0.5000
Epoch: 1; 72.30 sec; lr: 0.004631; Average loss: 56.71, Train accuracy: 0.5463, Val accuracy: 0.5000
Epoch: 2; 108.36 sec; lr: 0.004631; Average loss: 33.92, Train accuracy: 0.5380, Val accuracy: 0.5000
Epoch: 3; 144.41 sec; lr: 0.004631; Average loss: 26.65, Train accuracy: 0.5229, Val accuracy: 0.5000


[32m[I 2022-11-07 03:12:34,017][0m Trial 26 finished with value: 0.3958333333333333 and parameters: {'layer_count': 191, 'step_size': 10, 'batch_size': 224, 'learning_rate': 0.00463078506530661, 'gamma': 0.6620577305244298}. Best is trial 6 with value: 0.41233266398929047.[0m


Epoch: 4; 180.46 sec; lr: 0.004631; Average loss: 23.33, Train accuracy: 0.5586, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 36.29 sec; lr: 0.005533; Average loss: 365.22, Train accuracy: 0.5809, Val accuracy: 0.5000
Epoch: 1; 72.28 sec; lr: 0.005533; Average loss: 71.02, Train accuracy: 0.5019, Val accuracy: 0.5000
Epoch: 2; 108.29 sec; lr: 0.005533; Average loss: 22.42, Train accuracy: 0.5517, Val accuracy: 0.5000
Epoch: 3; 144.34 sec; lr: 0.005533; Average loss: 93.76, Train accuracy: 0.5079, Val accuracy: 0.5000


[32m[I 2022-11-07 03:15:36,711][0m Trial 27 finished with value: 0.3958333333333333 and parameters: {'layer_count': 191, 'step_size': 8, 'batch_size': 480, 'learning_rate': 0.005532680211381976, 'gamma': 0.7044790631727034}. Best is trial 6 with value: 0.41233266398929047.[0m


Epoch: 4; 180.31 sec; lr: 0.005533; Average loss: 41.36, Train accuracy: 0.5494, Val accuracy: 0.5000
True True             True True
pruned


[32m[I 2022-11-07 03:15:39,645][0m Trial 28 finished with value: 0.0 and parameters: {'layer_count': 193, 'step_size': 8, 'batch_size': 736, 'learning_rate': 0.005378050977892405, 'gamma': 0.7132249988683469}. Best is trial 6 with value: 0.41233266398929047.[0m


Exception: CUDA out of memory. Tried to allocate 1.03 GiB (GPU 0; 8.00 GiB total capacity; 4.51 GiB already allocated; 0 bytes free; 6.51 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF


[32m[I 2022-11-07 03:15:42,669][0m Trial 29 finished with value: 0.0 and parameters: {'layer_count': 189, 'step_size': 10, 'batch_size': 928, 'learning_rate': 0.004067460403419796, 'gamma': 0.6547598322075101}. Best is trial 6 with value: 0.41233266398929047.[0m


Exception: CUDA out of memory. Tried to allocate 1.30 GiB (GPU 0; 8.00 GiB total capacity; 3.95 GiB already allocated; 0 bytes free; 6.51 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 33.97 sec; lr: 0.007696; Average loss: 600.26, Train accuracy: 0.5587, Val accuracy: 0.5000
Epoch: 1; 67.83 sec; lr: 0.007696; Average loss: 67.86, Train accuracy: 0.5358, Val accuracy: 0.5000
Epoch: 2; 101.71 sec; lr: 0.007696; Average loss: 38.65, Train accuracy: 0.5533, Val accuracy: 0.5000
Epoch: 3; 135.66 sec; lr: 0.007696; Average loss: 53.69, Train accuracy: 0.5434, Val accuracy: 0.5000


[32m[I 2022-11-07 03:18:34,580][0m Trial 30 finished with value: 0.3958333333333333 and parameters: {'layer_count': 197, 'step_size': 6, 'batch_size': 224, 'learning_rate': 0.0076960895936528575, 'gamma': 0.5116859910167664}. Best is trial 6 with value: 0.41233266398929047.[0m


Epoch: 4; 169.63 sec; lr: 0.007696; Average loss: 31.70, Train accuracy: 0.5324, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 36.22 sec; lr: 0.007952; Average loss: 920.22, Train accuracy: 0.5018, Val accuracy: 0.5000
Epoch: 1; 72.29 sec; lr: 0.007952; Average loss: 210.00, Train accuracy: 0.5565, Val accuracy: 0.5000
Epoch: 2; 108.32 sec; lr: 0.007952; Average loss: 77.80, Train accuracy: 0.5451, Val accuracy: 0.4980
Epoch: 3; 144.37 sec; lr: 0.007952; Average loss: 46.23, Train accuracy: 0.5393, Val accuracy: 0.5000
Epoch: 4; 180.52 sec; lr: 0.007952; Average loss: 34.67, Train accuracy: 0.5145, Val accuracy: 0.5000
True False             True True
Epoch: 5; 216.61 sec; lr: 0.007952; Average loss: 18.36, Train accuracy: 0.5441, Val accuracy: 0.4980
True True             False True
Epoch: 6; 252.71 sec; lr: 0.007952; Average loss: 16.18, Train accuracy: 0.5434, Val accuracy: 0.5000
False True             True False
Epoch: 7; 288.89 sec; lr: 0.005878; Average l

[32m[I 2022-11-07 03:33:08,670][0m Trial 31 finished with value: 0.4748117469879518 and parameters: {'layer_count': 191, 'step_size': 8, 'batch_size': 480, 'learning_rate': 0.007952087223590521, 'gamma': 0.7391815495031641}. Best is trial 31 with value: 0.4748117469879518.[0m


Epoch: 23; 871.68 sec; lr: 0.003212; Average loss: 28.26, Train accuracy: 0.4844, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 36.57 sec; lr: 0.006560; Average loss: 664.25, Train accuracy: 0.5330, Val accuracy: 0.5000
Epoch: 1; 72.97 sec; lr: 0.006560; Average loss: 414.31, Train accuracy: 0.5066, Val accuracy: 0.5000
Epoch: 2; 109.49 sec; lr: 0.006560; Average loss: 132.27, Train accuracy: 0.5359, Val accuracy: 0.5000
Epoch: 3; 145.99 sec; lr: 0.006560; Average loss: 26.49, Train accuracy: 0.5408, Val accuracy: 0.5000


[32m[I 2022-11-07 03:36:13,720][0m Trial 32 finished with value: 0.3958333333333333 and parameters: {'layer_count': 191, 'step_size': 8, 'batch_size': 480, 'learning_rate': 0.006559911590636918, 'gamma': 0.7673046485375751}. Best is trial 31 with value: 0.4748117469879518.[0m


Epoch: 4; 182.52 sec; lr: 0.006560; Average loss: 37.61, Train accuracy: 0.5262, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 36.58 sec; lr: 0.006446; Average loss: 480.35, Train accuracy: 0.5441, Val accuracy: 0.4980
Epoch: 1; 73.00 sec; lr: 0.006446; Average loss: 121.79, Train accuracy: 0.5054, Val accuracy: 0.5000
Epoch: 2; 109.52 sec; lr: 0.006446; Average loss: 67.92, Train accuracy: 0.5442, Val accuracy: 0.5000
Epoch: 3; 145.98 sec; lr: 0.006446; Average loss: 60.24, Train accuracy: 0.5777, Val accuracy: 0.5000
Epoch: 4; 182.46 sec; lr: 0.006446; Average loss: 50.65, Train accuracy: 0.5032, Val accuracy: 0.4980
True True             True False
Epoch: 5; 219.03 sec; lr: 0.006446; Average loss: 23.37, Train accuracy: 0.5152, Val accuracy: 0.5000
False True             True True
Epoch: 6; 255.56 sec; lr: 0.006446; Average loss: 28.37, Train accuracy: 0.5454, Val accuracy: 0.5000
True False             True True
Epoch: 7; 292.05 sec; lr: 0.004947; Average lo

[32m[I 2022-11-07 03:42:21,187][0m Trial 33 finished with value: 0.41596385542168673 and parameters: {'layer_count': 191, 'step_size': 8, 'batch_size': 480, 'learning_rate': 0.006446406143406529, 'gamma': 0.767419785799724}. Best is trial 31 with value: 0.4748117469879518.[0m


Epoch: 9; 365.16 sec; lr: 0.004947; Average loss: 12.95, Train accuracy: 0.5030, Val accuracy: 0.4980
True True             True True
pruned


[32m[I 2022-11-07 03:42:24,312][0m Trial 34 finished with value: 0.0 and parameters: {'layer_count': 193, 'step_size': 8, 'batch_size': 608, 'learning_rate': 0.007668820120581481, 'gamma': 0.7799410133454869}. Best is trial 31 with value: 0.4748117469879518.[0m


Exception: CUDA out of memory. Tried to allocate 872.00 MiB (GPU 0; 8.00 GiB total capacity; 4.11 GiB already allocated; 0 bytes free; 6.51 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF


[32m[I 2022-11-07 03:42:27,783][0m Trial 35 finished with value: 0.0 and parameters: {'layer_count': 190, 'step_size': 8, 'batch_size': 1056, 'learning_rate': 0.00840639274428015, 'gamma': 0.7416052885999215}. Best is trial 31 with value: 0.4748117469879518.[0m


Exception: CUDA out of memory. Tried to allocate 1.48 GiB (GPU 0; 8.00 GiB total capacity; 6.29 GiB already allocated; 0 bytes free; 7.24 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF


[32m[I 2022-11-07 03:42:31,046][0m Trial 36 finished with value: 0.0 and parameters: {'layer_count': 195, 'step_size': 8, 'batch_size': 1504, 'learning_rate': 0.009240357847222316, 'gamma': 0.8459871258324846}. Best is trial 31 with value: 0.4748117469879518.[0m


Exception: CUDA out of memory. Tried to allocate 2.11 GiB (GPU 0; 8.00 GiB total capacity; 5.57 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF


[32m[I 2022-11-07 03:42:34,124][0m Trial 37 finished with value: 0.0 and parameters: {'layer_count': 196, 'step_size': 8, 'batch_size': 736, 'learning_rate': 0.007955202741899035, 'gamma': 0.6818075573023006}. Best is trial 31 with value: 0.4748117469879518.[0m


Exception: CUDA out of memory. Tried to allocate 1.03 GiB (GPU 0; 8.00 GiB total capacity; 4.51 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF


[32m[I 2022-11-07 03:42:37,255][0m Trial 38 finished with value: 0.0 and parameters: {'layer_count': 190, 'step_size': 8, 'batch_size': 864, 'learning_rate': 0.006913565087703691, 'gamma': 0.7285119920680865}. Best is trial 31 with value: 0.4748117469879518.[0m


Exception: CUDA out of memory. Tried to allocate 1.21 GiB (GPU 0; 8.00 GiB total capacity; 5.22 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF


[32m[I 2022-11-07 03:42:40,336][0m Trial 39 finished with value: 0.0 and parameters: {'layer_count': 211, 'step_size': 8, 'batch_size': 1120, 'learning_rate': 0.008754681837832382, 'gamma': 0.799094825784786}. Best is trial 31 with value: 0.4748117469879518.[0m


Exception: CUDA out of memory. Tried to allocate 1.57 GiB (GPU 0; 8.00 GiB total capacity; 4.25 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 33.86 sec; lr: 0.007474; Average loss: 283.14, Train accuracy: 0.5453, Val accuracy: 0.5000
Epoch: 1; 67.70 sec; lr: 0.007474; Average loss: 129.81, Train accuracy: 0.5417, Val accuracy: 0.5000
Epoch: 2; 101.61 sec; lr: 0.007474; Average loss: 74.94, Train accuracy: 0.5689, Val accuracy: 0.5080
Epoch: 3; 135.56 sec; lr: 0.007474; Average loss: 42.43, Train accuracy: 0.5718, Val accuracy: 0.4980
Epoch: 4; 169.53 sec; lr: 0.007474; Average loss: 105.09, Train accuracy: 0.5423, Val accuracy: 0.5301
False True             False True
Epoch: 5; 203.53 sec; lr: 0.007474; Average loss: 88.78, Train accuracy: 0.5506, Val accuracy: 0.4980
True False      

[32m[I 2022-11-07 03:59:44,439][0m Trial 40 finished with value: 0.5188587684069612 and parameters: {'layer_count': 206, 'step_size': 8, 'batch_size': 288, 'learning_rate': 0.007474153509065735, 'gamma': 0.6333717907795332}. Best is trial 40 with value: 0.5188587684069612.[0m


Epoch: 29; 1021.59 sec; lr: 0.001899; Average loss: 26.11, Train accuracy: 0.5598, Val accuracy: 0.5120
False True             True False
Epoch: 0; 34.39 sec; lr: 0.007396; Average loss: 304.42, Train accuracy: 0.5604, Val accuracy: 0.5000
Epoch: 1; 68.52 sec; lr: 0.007396; Average loss: 160.94, Train accuracy: 0.5423, Val accuracy: 0.4980
Epoch: 2; 102.61 sec; lr: 0.007396; Average loss: 106.38, Train accuracy: 0.5673, Val accuracy: 0.5080
Epoch: 3; 136.71 sec; lr: 0.007396; Average loss: 53.87, Train accuracy: 0.5823, Val accuracy: 0.5100
Epoch: 4; 170.82 sec; lr: 0.007396; Average loss: 54.16, Train accuracy: 0.5662, Val accuracy: 0.5020
True False             False True
Epoch: 5; 204.91 sec; lr: 0.007396; Average loss: 75.81, Train accuracy: 0.5672, Val accuracy: 0.5161
False True             False False
Epoch: 6; 239.08 sec; lr: 0.007396; Average loss: 35.75, Train accuracy: 0.5938, Val accuracy: 0.4960
True False             True False
Epoch: 7; 273.17 sec; lr: 0.004547; Average 

[32m[I 2022-11-07 04:16:50,565][0m Trial 41 finished with value: 0.5159136546184738 and parameters: {'layer_count': 206, 'step_size': 8, 'batch_size': 288, 'learning_rate': 0.007396263122868352, 'gamma': 0.6147441999216842}. Best is trial 40 with value: 0.5188587684069612.[0m


Epoch: 29; 1023.72 sec; lr: 0.001718; Average loss: 21.77, Train accuracy: 0.5757, Val accuracy: 0.4960
True True             False True
Epoch: 0; 34.39 sec; lr: 0.006534; Average loss: 205.63, Train accuracy: 0.5677, Val accuracy: 0.5000
Epoch: 1; 68.64 sec; lr: 0.006534; Average loss: 104.05, Train accuracy: 0.5586, Val accuracy: 0.5100
Epoch: 2; 102.79 sec; lr: 0.006534; Average loss: 56.69, Train accuracy: 0.5591, Val accuracy: 0.5000
Epoch: 3; 136.93 sec; lr: 0.006534; Average loss: 125.81, Train accuracy: 0.5423, Val accuracy: 0.4960
Epoch: 4; 171.09 sec; lr: 0.006534; Average loss: 76.45, Train accuracy: 0.5663, Val accuracy: 0.4980
False True             True False
Epoch: 5; 205.22 sec; lr: 0.006534; Average loss: 66.06, Train accuracy: 0.5749, Val accuracy: 0.5020
False False             True True
Epoch: 6; 239.35 sec; lr: 0.006534; Average loss: 48.44, Train accuracy: 0.5789, Val accuracy: 0.5060
False False             False True
Epoch: 7; 273.53 sec; lr: 0.004004; Average l

[32m[I 2022-11-07 04:33:57,435][0m Trial 42 finished with value: 0.5098226238286478 and parameters: {'layer_count': 207, 'step_size': 8, 'batch_size': 288, 'learning_rate': 0.006534126220241599, 'gamma': 0.6128106859344026}. Best is trial 40 with value: 0.5188587684069612.[0m


Epoch: 29; 1024.57 sec; lr: 0.001504; Average loss: 13.21, Train accuracy: 0.5903, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 34.41 sec; lr: 0.006469; Average loss: 200.04, Train accuracy: 0.5650, Val accuracy: 0.5000
Epoch: 1; 68.57 sec; lr: 0.006469; Average loss: 63.00, Train accuracy: 0.5590, Val accuracy: 0.5261
Epoch: 2; 102.75 sec; lr: 0.006469; Average loss: 32.04, Train accuracy: 0.5829, Val accuracy: 0.5040
Epoch: 3; 136.88 sec; lr: 0.006469; Average loss: 71.38, Train accuracy: 0.5489, Val accuracy: 0.5442
Epoch: 4; 171.00 sec; lr: 0.006469; Average loss: 33.05, Train accuracy: 0.5749, Val accuracy: 0.5100
True False             True False
Epoch: 5; 205.14 sec; lr: 0.006469; Average loss: 34.21, Train accuracy: 0.5790, Val accuracy: 0.5281
False True             False True
Epoch: 6; 239.25 sec; lr: 0.006469; Average loss: 77.99, Train accuracy: 0.5606, Val accuracy: 0.5000
True False             True False
Epoch: 7; 273.46 sec; lr: 0.004022; Averag

[32m[I 2022-11-07 04:51:04,513][0m Trial 43 finished with value: 0.5181726907630522 and parameters: {'layer_count': 207, 'step_size': 8, 'batch_size': 288, 'learning_rate': 0.006469173177835, 'gamma': 0.6217257152730571}. Best is trial 40 with value: 0.5188587684069612.[0m


Epoch: 29; 1024.49 sec; lr: 0.001555; Average loss: 17.84, Train accuracy: 0.5812, Val accuracy: 0.4980
False True             False True
Epoch: 0; 34.33 sec; lr: 0.005845; Average loss: 148.09, Train accuracy: 0.5624, Val accuracy: 0.5000
Epoch: 1; 68.59 sec; lr: 0.005845; Average loss: 72.58, Train accuracy: 0.5527, Val accuracy: 0.4980
Epoch: 2; 102.81 sec; lr: 0.005845; Average loss: 70.62, Train accuracy: 0.5567, Val accuracy: 0.4980
Epoch: 3; 137.03 sec; lr: 0.005845; Average loss: 48.10, Train accuracy: 0.5800, Val accuracy: 0.5000
Epoch: 4; 171.23 sec; lr: 0.005845; Average loss: 30.10, Train accuracy: 0.5808, Val accuracy: 0.5000
True False             True True
Epoch: 5; 205.44 sec; lr: 0.005845; Average loss: 65.66, Train accuracy: 0.5684, Val accuracy: 0.4960
True True             False True
Epoch: 6; 239.64 sec; lr: 0.005845; Average loss: 70.15, Train accuracy: 0.5624, Val accuracy: 0.4960
True True             True False
Epoch: 7; 273.84 sec; lr: 0.003695; Average loss: 

[32m[I 2022-11-07 05:08:14,012][0m Trial 44 finished with value: 0.5126338688085675 and parameters: {'layer_count': 207, 'step_size': 8, 'batch_size': 160, 'learning_rate': 0.005845097922003557, 'gamma': 0.632176185164379}. Best is trial 40 with value: 0.5188587684069612.[0m


Epoch: 29; 1027.08 sec; lr: 0.001477; Average loss: 7.24, Train accuracy: 0.6137, Val accuracy: 0.5241
False False             False True
Epoch: 0; 34.27 sec; lr: 0.004738; Average loss: 160.86, Train accuracy: 0.5491, Val accuracy: 0.5040
Epoch: 1; 68.44 sec; lr: 0.004738; Average loss: 50.33, Train accuracy: 0.5513, Val accuracy: 0.5301
Epoch: 2; 102.56 sec; lr: 0.004738; Average loss: 21.69, Train accuracy: 0.5884, Val accuracy: 0.4980
Epoch: 3; 136.69 sec; lr: 0.004738; Average loss: 48.96, Train accuracy: 0.5588, Val accuracy: 0.4940
Epoch: 4; 170.83 sec; lr: 0.004738; Average loss: 56.21, Train accuracy: 0.5512, Val accuracy: 0.5361
False True             True False
Epoch: 5; 204.97 sec; lr: 0.004738; Average loss: 19.86, Train accuracy: 0.5929, Val accuracy: 0.5060
True False             True True
Epoch: 6; 239.10 sec; lr: 0.004738; Average loss: 51.55, Train accuracy: 0.5540, Val accuracy: 0.5201
False True             False True
Epoch: 7; 273.31 sec; lr: 0.002959; Average loss

[32m[I 2022-11-07 05:25:21,377][0m Trial 45 finished with value: 0.5146251673360107 and parameters: {'layer_count': 207, 'step_size': 8, 'batch_size': 224, 'learning_rate': 0.004738075724114614, 'gamma': 0.6245337476581904}. Best is trial 40 with value: 0.5188587684069612.[0m


Epoch: 29; 1024.91 sec; lr: 0.001154; Average loss: 9.79, Train accuracy: 0.5939, Val accuracy: 0.5040
True True             True True
pruned
Epoch: 0; 35.71 sec; lr: 0.004704; Average loss: 67.78, Train accuracy: 0.5768, Val accuracy: 0.5000
Epoch: 1; 71.05 sec; lr: 0.004704; Average loss: 59.81, Train accuracy: 0.5795, Val accuracy: 0.5000
Epoch: 2; 106.44 sec; lr: 0.004704; Average loss: 47.31, Train accuracy: 0.5839, Val accuracy: 0.5000
Epoch: 3; 141.84 sec; lr: 0.004704; Average loss: 53.52, Train accuracy: 0.5892, Val accuracy: 0.5000


[32m[I 2022-11-07 05:28:21,018][0m Trial 46 finished with value: 0.3952309236947791 and parameters: {'layer_count': 205, 'step_size': 8, 'batch_size': 32, 'learning_rate': 0.004703847700126, 'gamma': 0.6334044796998741}. Best is trial 40 with value: 0.5188587684069612.[0m


Epoch: 4; 177.26 sec; lr: 0.004704; Average loss: 58.53, Train accuracy: 0.5884, Val accuracy: 0.4980
True True             True True
pruned
Epoch: 0; 34.50 sec; lr: 0.005530; Average loss: 121.53, Train accuracy: 0.5650, Val accuracy: 0.4960
Epoch: 1; 68.86 sec; lr: 0.005530; Average loss: 56.90, Train accuracy: 0.5709, Val accuracy: 0.4960
Epoch: 2; 103.22 sec; lr: 0.005530; Average loss: 35.43, Train accuracy: 0.5768, Val accuracy: 0.5000
Epoch: 3; 137.60 sec; lr: 0.005530; Average loss: 35.58, Train accuracy: 0.5806, Val accuracy: 0.5000
Epoch: 4; 171.98 sec; lr: 0.005530; Average loss: 59.31, Train accuracy: 0.5709, Val accuracy: 0.5000
True True             False True
Epoch: 5; 206.34 sec; lr: 0.005530; Average loss: 53.24, Train accuracy: 0.5807, Val accuracy: 0.5000
True True             True False
Epoch: 6; 240.72 sec; lr: 0.005530; Average loss: 53.86, Train accuracy: 0.5796, Val accuracy: 0.5181
False True             True True
Epoch: 7; 275.07 sec; lr: 0.003219; Average los

[32m[I 2022-11-07 05:45:34,990][0m Trial 47 finished with value: 0.503396921017403 and parameters: {'layer_count': 209, 'step_size': 8, 'batch_size': 96, 'learning_rate': 0.005530039777552087, 'gamma': 0.5820248727045539}. Best is trial 40 with value: 0.5188587684069612.[0m


Epoch: 29; 1031.42 sec; lr: 0.001090; Average loss: 17.13, Train accuracy: 0.5729, Val accuracy: 0.4960
False True             True False
Epoch: 0; 34.43 sec; lr: 0.000584; Average loss: 36.19, Train accuracy: 0.5474, Val accuracy: 0.5060
Epoch: 1; 68.72 sec; lr: 0.000584; Average loss: 19.24, Train accuracy: 0.5494, Val accuracy: 0.5321
Epoch: 2; 102.92 sec; lr: 0.000584; Average loss: 14.81, Train accuracy: 0.5552, Val accuracy: 0.5261
Epoch: 3; 137.10 sec; lr: 0.000584; Average loss: 13.43, Train accuracy: 0.5549, Val accuracy: 0.5281
Epoch: 4; 171.28 sec; lr: 0.000584; Average loss: 12.43, Train accuracy: 0.5577, Val accuracy: 0.5201
True False             True False
Epoch: 5; 205.43 sec; lr: 0.000584; Average loss: 10.83, Train accuracy: 0.5597, Val accuracy: 0.5100
True True             False True
Epoch: 6; 239.58 sec; lr: 0.000584; Average loss: 11.92, Train accuracy: 0.5558, Val accuracy: 0.5080
True True             True False


[32m[I 2022-11-07 05:50:11,147][0m Trial 48 finished with value: 0.4221385542168674 and parameters: {'layer_count': 206, 'step_size': 8, 'batch_size': 224, 'learning_rate': 0.000583788904784186, 'gamma': 0.5855758348276209}. Best is trial 40 with value: 0.5188587684069612.[0m


Epoch: 7; 273.73 sec; lr: 0.000342; Average loss: 11.34, Train accuracy: 0.5677, Val accuracy: 0.5060
True True             True True
pruned
Epoch: 0; 34.43 sec; lr: 0.003533; Average loss: 45.79, Train accuracy: 0.5685, Val accuracy: 0.4980
Epoch: 1; 68.68 sec; lr: 0.003533; Average loss: 44.57, Train accuracy: 0.5676, Val accuracy: 0.5000
Epoch: 2; 102.96 sec; lr: 0.003533; Average loss: 49.71, Train accuracy: 0.5746, Val accuracy: 0.5201
Epoch: 3; 137.32 sec; lr: 0.003533; Average loss: 17.42, Train accuracy: 0.5857, Val accuracy: 0.4980
Epoch: 4; 171.57 sec; lr: 0.003533; Average loss: 29.54, Train accuracy: 0.5781, Val accuracy: 0.4960
True True             False False
Epoch: 5; 205.84 sec; lr: 0.003533; Average loss: 45.36, Train accuracy: 0.5588, Val accuracy: 0.5100
False True             True False
Epoch: 6; 240.11 sec; lr: 0.003533; Average loss: 26.24, Train accuracy: 0.5716, Val accuracy: 0.5020
True False             True True
Epoch: 7; 274.36 sec; lr: 0.002265; Average lo

[32m[I 2022-11-07 06:07:21,688][0m Trial 49 finished with value: 0.5137215528781793 and parameters: {'layer_count': 208, 'step_size': 8, 'batch_size': 160, 'learning_rate': 0.0035327379786630475, 'gamma': 0.6412441748276555}. Best is trial 40 with value: 0.5188587684069612.[0m


Epoch: 29; 1028.16 sec; lr: 0.000931; Average loss: 11.81, Train accuracy: 0.5779, Val accuracy: 0.5000
True True             False True
Epoch: 0; 34.54 sec; lr: 0.003511; Average loss: 96.45, Train accuracy: 0.5517, Val accuracy: 0.5301
Epoch: 1; 68.76 sec; lr: 0.003511; Average loss: 35.77, Train accuracy: 0.5673, Val accuracy: 0.5442
Epoch: 2; 102.97 sec; lr: 0.003511; Average loss: 27.50, Train accuracy: 0.5751, Val accuracy: 0.5060
Epoch: 3; 137.16 sec; lr: 0.003511; Average loss: 62.81, Train accuracy: 0.5617, Val accuracy: 0.5000
Epoch: 4; 171.34 sec; lr: 0.003511; Average loss: 60.69, Train accuracy: 0.5668, Val accuracy: 0.4980
True True             True False
Epoch: 5; 205.53 sec; lr: 0.003511; Average loss: 38.15, Train accuracy: 0.5901, Val accuracy: 0.5100
False True             True True
Epoch: 6; 239.79 sec; lr: 0.003511; Average loss: 32.34, Train accuracy: 0.5607, Val accuracy: 0.5020
True False             True True
Epoch: 7; 273.99 sec; lr: 0.001944; Average loss: 40

[32m[I 2022-11-07 06:17:05,950][0m Trial 50 finished with value: 0.459210370895346 and parameters: {'layer_count': 209, 'step_size': 8, 'batch_size': 288, 'learning_rate': 0.0035106931363031264, 'gamma': 0.5537088841799227}. Best is trial 40 with value: 0.5188587684069612.[0m


Epoch: 16; 581.78 sec; lr: 0.001076; Average loss: 19.95, Train accuracy: 0.5749, Val accuracy: 0.4980
True True             True True
pruned
Epoch: 0; 34.44 sec; lr: 0.004361; Average loss: 108.24, Train accuracy: 0.5768, Val accuracy: 0.5000
Epoch: 1; 68.81 sec; lr: 0.004361; Average loss: 27.06, Train accuracy: 0.5743, Val accuracy: 0.5000
Epoch: 2; 103.15 sec; lr: 0.004361; Average loss: 50.29, Train accuracy: 0.5621, Val accuracy: 0.5000
Epoch: 3; 137.50 sec; lr: 0.004361; Average loss: 70.97, Train accuracy: 0.5787, Val accuracy: 0.4980
Epoch: 4; 171.85 sec; lr: 0.004361; Average loss: 43.97, Train accuracy: 0.5919, Val accuracy: 0.5000
False True             True True
Epoch: 5; 206.20 sec; lr: 0.004361; Average loss: 30.88, Train accuracy: 0.5884, Val accuracy: 0.4980
True False             True True
Epoch: 6; 240.56 sec; lr: 0.004361; Average loss: 58.49, Train accuracy: 0.5749, Val accuracy: 0.5000
False True             False True
Epoch: 7; 274.92 sec; lr: 0.002802; Average l

[32m[I 2022-11-07 06:29:44,681][0m Trial 51 finished with value: 0.47515516611902153 and parameters: {'layer_count': 207, 'step_size': 8, 'batch_size': 96, 'learning_rate': 0.004361384475348413, 'gamma': 0.6423451609884747}. Best is trial 40 with value: 0.5188587684069612.[0m


Epoch: 21; 756.26 sec; lr: 0.001800; Average loss: 18.23, Train accuracy: 0.5985, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 34.50 sec; lr: 0.005147; Average loss: 192.57, Train accuracy: 0.5423, Val accuracy: 0.5040
Epoch: 1; 68.67 sec; lr: 0.005147; Average loss: 67.13, Train accuracy: 0.5594, Val accuracy: 0.5020
Epoch: 2; 102.86 sec; lr: 0.005147; Average loss: 37.19, Train accuracy: 0.5715, Val accuracy: 0.5120
Epoch: 3; 137.11 sec; lr: 0.005147; Average loss: 43.80, Train accuracy: 0.5664, Val accuracy: 0.5000
Epoch: 4; 171.31 sec; lr: 0.005147; Average loss: 131.76, Train accuracy: 0.5380, Val accuracy: 0.5442
False True             False True
Epoch: 5; 205.50 sec; lr: 0.005147; Average loss: 17.46, Train accuracy: 0.6092, Val accuracy: 0.5020
True False             True False
Epoch: 6; 239.69 sec; lr: 0.005147; Average loss: 55.11, Train accuracy: 0.5457, Val accuracy: 0.5261
False True             False True
Epoch: 7; 273.88 sec; lr: 0.003519; Averag

[32m[I 2022-11-07 06:46:53,685][0m Trial 52 finished with value: 0.5146084337349397 and parameters: {'layer_count': 208, 'step_size': 8, 'batch_size': 288, 'learning_rate': 0.005147238253619593, 'gamma': 0.6835956762960238}. Best is trial 40 with value: 0.5188587684069612.[0m


Epoch: 29; 1026.01 sec; lr: 0.001644; Average loss: 21.47, Train accuracy: 0.5695, Val accuracy: 0.5020
True True             True True
pruned
Epoch: 0; 34.44 sec; lr: 0.005002; Average loss: 210.25, Train accuracy: 0.5575, Val accuracy: 0.5000
Epoch: 1; 68.64 sec; lr: 0.005002; Average loss: 114.98, Train accuracy: 0.5363, Val accuracy: 0.4920
Epoch: 2; 102.82 sec; lr: 0.005002; Average loss: 56.31, Train accuracy: 0.5540, Val accuracy: 0.5000
Epoch: 3; 136.99 sec; lr: 0.005002; Average loss: 52.55, Train accuracy: 0.5689, Val accuracy: 0.5000
Epoch: 4; 171.18 sec; lr: 0.005002; Average loss: 95.25, Train accuracy: 0.5573, Val accuracy: 0.4980
True True             False True
Epoch: 5; 205.36 sec; lr: 0.005002; Average loss: 80.91, Train accuracy: 0.5626, Val accuracy: 0.5000
False True             True False
Epoch: 6; 239.61 sec; lr: 0.005002; Average loss: 83.19, Train accuracy: 0.5660, Val accuracy: 0.5000
True False             True True
Epoch: 7; 273.79 sec; lr: 0.003421; Average

[32m[I 2022-11-07 07:04:01,809][0m Trial 53 finished with value: 0.5141231593038822 and parameters: {'layer_count': 209, 'step_size': 8, 'batch_size': 288, 'learning_rate': 0.005002397778235457, 'gamma': 0.6839308698835546}. Best is trial 40 with value: 0.5188587684069612.[0m


Epoch: 29; 1025.78 sec; lr: 0.001600; Average loss: 12.29, Train accuracy: 0.5977, Val accuracy: 0.5060
True False             True True
Epoch: 0; 34.44 sec; lr: 0.005323; Average loss: 219.95, Train accuracy: 0.5645, Val accuracy: 0.4980
Epoch: 1; 68.60 sec; lr: 0.005323; Average loss: 123.92, Train accuracy: 0.5362, Val accuracy: 0.5100
Epoch: 2; 102.79 sec; lr: 0.005323; Average loss: 44.23, Train accuracy: 0.5546, Val accuracy: 0.5181
Epoch: 3; 136.97 sec; lr: 0.003637; Average loss: 32.17, Train accuracy: 0.5716, Val accuracy: 0.5301
Epoch: 4; 171.14 sec; lr: 0.003637; Average loss: 24.16, Train accuracy: 0.5914, Val accuracy: 0.4960
True False             False False
Epoch: 5; 205.27 sec; lr: 0.003637; Average loss: 43.03, Train accuracy: 0.5556, Val accuracy: 0.4980
False True             False False
Epoch: 6; 239.46 sec; lr: 0.003637; Average loss: 51.65, Train accuracy: 0.5547, Val accuracy: 0.5281
False False             True False
Epoch: 7; 273.62 sec; lr: 0.002486; Average 

[32m[I 2022-11-07 07:21:09,945][0m Trial 54 finished with value: 0.5353580990629183 and parameters: {'layer_count': 211, 'step_size': 4, 'batch_size': 288, 'learning_rate': 0.005322610593735478, 'gamma': 0.6833989196012462}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1025.63 sec; lr: 0.000371; Average loss: 6.81, Train accuracy: 0.5958, Val accuracy: 0.5522
True False             False False
Epoch: 0; 34.42 sec; lr: 0.005139; Average loss: 341.13, Train accuracy: 0.5248, Val accuracy: 0.5000
Epoch: 1; 68.61 sec; lr: 0.005139; Average loss: 105.48, Train accuracy: 0.5753, Val accuracy: 0.5181
Epoch: 2; 102.82 sec; lr: 0.005139; Average loss: 58.92, Train accuracy: 0.5554, Val accuracy: 0.5020
Epoch: 3; 136.99 sec; lr: 0.003466; Average loss: 22.97, Train accuracy: 0.5971, Val accuracy: 0.5201
Epoch: 4; 171.13 sec; lr: 0.003466; Average loss: 23.54, Train accuracy: 0.5720, Val accuracy: 0.5261
False False             True False
Epoch: 5; 205.33 sec; lr: 0.003466; Average loss: 24.60, Train accuracy: 0.5743, Val accuracy: 0.5201
True False             False True
Epoch: 6; 239.49 sec; lr: 0.003466; Average loss: 25.39, Train accuracy: 0.5738, Val accuracy: 0.5020
True True             False False
Epoch: 7; 273.68 sec; lr: 0.002337; Average l

[32m[I 2022-11-07 07:38:17,359][0m Trial 55 finished with value: 0.5162817938420348 and parameters: {'layer_count': 211, 'step_size': 4, 'batch_size': 608, 'learning_rate': 0.005138505418050356, 'gamma': 0.6744429510548986}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1024.92 sec; lr: 0.000326; Average loss: 6.16, Train accuracy: 0.5852, Val accuracy: 0.5100
False True             False True
Epoch: 0; 34.25 sec; lr: 0.007306; Average loss: 302.36, Train accuracy: 0.5534, Val accuracy: 0.5241
Epoch: 1; 68.42 sec; lr: 0.007306; Average loss: 123.99, Train accuracy: 0.5237, Val accuracy: 0.4940
Epoch: 2; 102.51 sec; lr: 0.007306; Average loss: 108.00, Train accuracy: 0.5293, Val accuracy: 0.4980
Epoch: 3; 136.62 sec; lr: 0.004370; Average loss: 137.58, Train accuracy: 0.5175, Val accuracy: 0.5341
Epoch: 4; 170.75 sec; lr: 0.004370; Average loss: 37.17, Train accuracy: 0.5904, Val accuracy: 0.5120
True False             False True
Epoch: 5; 204.93 sec; lr: 0.004370; Average loss: 55.61, Train accuracy: 0.5696, Val accuracy: 0.5040
True True             False False
Epoch: 6; 239.05 sec; lr: 0.004370; Average loss: 58.48, Train accuracy: 0.5675, Val accuracy: 0.5040
True True             True False


[32m[I 2022-11-07 07:42:52,908][0m Trial 56 finished with value: 0.4196285140562249 and parameters: {'layer_count': 211, 'step_size': 4, 'batch_size': 416, 'learning_rate': 0.0073064688632113696, 'gamma': 0.5980431319300834}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 7; 273.18 sec; lr: 0.002613; Average loss: 62.63, Train accuracy: 0.5579, Val accuracy: 0.5020
True True             True True
pruned
Epoch: 0; 34.41 sec; lr: 0.005897; Average loss: 330.12, Train accuracy: 0.5149, Val accuracy: 0.5000
Epoch: 1; 68.57 sec; lr: 0.005897; Average loss: 96.40, Train accuracy: 0.5561, Val accuracy: 0.5281
Epoch: 2; 102.73 sec; lr: 0.005897; Average loss: 25.57, Train accuracy: 0.6056, Val accuracy: 0.5040
Epoch: 3; 136.93 sec; lr: 0.003291; Average loss: 74.36, Train accuracy: 0.5397, Val accuracy: 0.5020
Epoch: 4; 171.06 sec; lr: 0.003291; Average loss: 23.57, Train accuracy: 0.5825, Val accuracy: 0.5422
False True             True False
Epoch: 5; 205.23 sec; lr: 0.003291; Average loss: 12.74, Train accuracy: 0.6033, Val accuracy: 0.4980
True False             True True
Epoch: 6; 239.36 sec; lr: 0.003291; Average loss: 38.48, Train accuracy: 0.5483, Val accuracy: 0.5181
False True             False True
Epoch: 7; 273.54 sec; lr: 0.001837; Average l

[32m[I 2022-11-07 08:00:00,086][0m Trial 57 finished with value: 0.5125334672021419 and parameters: {'layer_count': 210, 'step_size': 4, 'batch_size': 608, 'learning_rate': 0.005896909897918783, 'gamma': 0.5581445124962385}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1024.83 sec; lr: 0.000100; Average loss: 6.91, Train accuracy: 0.5957, Val accuracy: 0.4940
True False             True True
Epoch: 0; 34.48 sec; lr: 0.006870; Average loss: 365.18, Train accuracy: 0.5590, Val accuracy: 0.5000
Epoch: 1; 68.78 sec; lr: 0.006870; Average loss: 147.17, Train accuracy: 0.5333, Val accuracy: 0.4960
Epoch: 2; 103.17 sec; lr: 0.006870; Average loss: 56.70, Train accuracy: 0.5655, Val accuracy: 0.5120
Epoch: 3; 137.46 sec; lr: 0.004137; Average loss: 41.68, Train accuracy: 0.5679, Val accuracy: 0.5000
Epoch: 4; 171.78 sec; lr: 0.004137; Average loss: 63.02, Train accuracy: 0.5430, Val accuracy: 0.5482
False True             False True
Epoch: 5; 206.09 sec; lr: 0.004137; Average loss: 14.95, Train accuracy: 0.6067, Val accuracy: 0.4920
True False             True False
Epoch: 6; 240.46 sec; lr: 0.004137; Average loss: 16.71, Train accuracy: 0.5642, Val accuracy: 0.5060
False True             False True
Epoch: 7; 274.76 sec; lr: 0.002491; Average loss

[32m[I 2022-11-07 08:17:12,338][0m Trial 58 finished with value: 0.5222222222222223 and parameters: {'layer_count': 203, 'step_size': 4, 'batch_size': 416, 'learning_rate': 0.006870472237485309, 'gamma': 0.6021629120520136}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1029.77 sec; lr: 0.000197; Average loss: 4.16, Train accuracy: 0.5862, Val accuracy: 0.5261
False True             False True
Epoch: 0; 34.61 sec; lr: 0.006989; Average loss: 892.94, Train accuracy: 0.5594, Val accuracy: 0.5000
Epoch: 1; 69.03 sec; lr: 0.006989; Average loss: 497.54, Train accuracy: 0.5080, Val accuracy: 0.5020
Epoch: 2; 103.45 sec; lr: 0.006989; Average loss: 279.91, Train accuracy: 0.5118, Val accuracy: 0.5020
Epoch: 3; 137.97 sec; lr: 0.004187; Average loss: 68.54, Train accuracy: 0.5183, Val accuracy: 0.5000
Epoch: 4; 172.39 sec; lr: 0.004187; Average loss: 112.24, Train accuracy: 0.5198, Val accuracy: 0.5000
True True             True False
Epoch: 5; 206.81 sec; lr: 0.004187; Average loss: 128.79, Train accuracy: 0.4941, Val accuracy: 0.5161
False True             True True
Epoch: 6; 241.21 sec; lr: 0.004187; Average loss: 57.85, Train accuracy: 0.5307, Val accuracy: 0.5000
True False             True True
Epoch: 7; 275.72 sec; lr: 0.002509; Average los

[32m[I 2022-11-07 08:22:59,389][0m Trial 59 finished with value: 0.4195281124497992 and parameters: {'layer_count': 201, 'step_size': 4, 'batch_size': 416, 'learning_rate': 0.006988547254049576, 'gamma': 0.5991596295839577}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 9; 344.56 sec; lr: 0.002509; Average loss: 10.79, Train accuracy: 0.5406, Val accuracy: 0.4940
True True             True True
pruned
Epoch: 0; 34.34 sec; lr: 0.006202; Average loss: 327.92, Train accuracy: 0.5345, Val accuracy: 0.5000
Epoch: 1; 68.60 sec; lr: 0.006202; Average loss: 115.87, Train accuracy: 0.5455, Val accuracy: 0.5020
Epoch: 2; 102.82 sec; lr: 0.006202; Average loss: 46.10, Train accuracy: 0.5558, Val accuracy: 0.4960
Epoch: 3; 137.12 sec; lr: 0.006186; Average loss: 39.22, Train accuracy: 0.5633, Val accuracy: 0.4880
Epoch: 4; 171.33 sec; lr: 0.006186; Average loss: 49.27, Train accuracy: 0.5669, Val accuracy: 0.4920
False True             True False
Epoch: 5; 205.53 sec; lr: 0.006186; Average loss: 61.14, Train accuracy: 0.5380, Val accuracy: 0.4940
False False             True True
Epoch: 6; 239.83 sec; lr: 0.006186; Average loss: 48.24, Train accuracy: 0.5383, Val accuracy: 0.4940
True False             False True
Epoch: 7; 274.02 sec; lr: 0.006169; Average

[32m[I 2022-11-07 08:40:09,137][0m Trial 60 finished with value: 0.5097389558232932 and parameters: {'layer_count': 204, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.006202170383002161, 'gamma': 0.9973221185486505}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1027.15 sec; lr: 0.006087; Average loss: 16.55, Train accuracy: 0.5978, Val accuracy: 0.4940
True False             True False
Epoch: 0; 34.38 sec; lr: 0.006814; Average loss: 320.58, Train accuracy: 0.5338, Val accuracy: 0.5120
Epoch: 1; 68.59 sec; lr: 0.006814; Average loss: 29.18, Train accuracy: 0.5867, Val accuracy: 0.4920
Epoch: 2; 102.89 sec; lr: 0.006814; Average loss: 72.17, Train accuracy: 0.5659, Val accuracy: 0.4980
Epoch: 3; 137.10 sec; lr: 0.004550; Average loss: 57.83, Train accuracy: 0.5642, Val accuracy: 0.5000
Epoch: 4; 171.30 sec; lr: 0.004550; Average loss: 77.64, Train accuracy: 0.5577, Val accuracy: 0.4940
True False             False True
Epoch: 5; 205.50 sec; lr: 0.004550; Average loss: 57.36, Train accuracy: 0.5682, Val accuracy: 0.5060
False True             False False
Epoch: 6; 239.72 sec; lr: 0.004550; Average loss: 41.49, Train accuracy: 0.5764, Val accuracy: 0.5080
False False             True False
Epoch: 7; 273.94 sec; lr: 0.003038; Average l

[32m[I 2022-11-07 08:57:19,287][0m Trial 61 finished with value: 0.5227576974564927 and parameters: {'layer_count': 205, 'step_size': 4, 'batch_size': 352, 'learning_rate': 0.0068139861285801805, 'gamma': 0.6677106024702967}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1027.62 sec; lr: 0.000403; Average loss: 8.12, Train accuracy: 0.5816, Val accuracy: 0.5120
True False             True True
Epoch: 0; 34.59 sec; lr: 0.006746; Average loss: 279.69, Train accuracy: 0.5513, Val accuracy: 0.4980
Epoch: 1; 68.93 sec; lr: 0.006746; Average loss: 190.77, Train accuracy: 0.5128, Val accuracy: 0.4960
Epoch: 2; 103.22 sec; lr: 0.006746; Average loss: 47.42, Train accuracy: 0.5816, Val accuracy: 0.5301
Epoch: 3; 137.53 sec; lr: 0.004498; Average loss: 39.17, Train accuracy: 0.5831, Val accuracy: 0.5181
Epoch: 4; 171.83 sec; lr: 0.004498; Average loss: 33.29, Train accuracy: 0.5599, Val accuracy: 0.4980
True True             False True
Epoch: 5; 206.13 sec; lr: 0.004498; Average loss: 56.41, Train accuracy: 0.5455, Val accuracy: 0.5060
False True             True False
Epoch: 6; 240.47 sec; lr: 0.004498; Average loss: 40.02, Train accuracy: 0.5748, Val accuracy: 0.4980
True False             True True
Epoch: 7; 274.77 sec; lr: 0.002999; Average loss: 

[32m[I 2022-11-07 09:14:31,756][0m Trial 62 finished with value: 0.5156459170013387 and parameters: {'layer_count': 205, 'step_size': 4, 'batch_size': 352, 'learning_rate': 0.006745717310054074, 'gamma': 0.6667577886901094}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1030.13 sec; lr: 0.000395; Average loss: 7.86, Train accuracy: 0.5832, Val accuracy: 0.5080
False True             False True
Epoch: 0; 34.59 sec; lr: 0.007476; Average loss: 147.25, Train accuracy: 0.5470, Val accuracy: 0.5000
Epoch: 1; 69.08 sec; lr: 0.007476; Average loss: 75.79, Train accuracy: 0.5430, Val accuracy: 0.4980
Epoch: 2; 103.59 sec; lr: 0.007476; Average loss: 75.69, Train accuracy: 0.5352, Val accuracy: 0.5000
Epoch: 3; 138.03 sec; lr: 0.004914; Average loss: 63.90, Train accuracy: 0.5496, Val accuracy: 0.4900
Epoch: 4; 172.49 sec; lr: 0.004914; Average loss: 50.66, Train accuracy: 0.5645, Val accuracy: 0.5020
False True             False True
Epoch: 5; 207.02 sec; lr: 0.004914; Average loss: 28.38, Train accuracy: 0.5922, Val accuracy: 0.4960
True False             True False
Epoch: 6; 241.48 sec; lr: 0.004914; Average loss: 40.48, Train accuracy: 0.5564, Val accuracy: 0.4940
True True             False True
Epoch: 7; 275.92 sec; lr: 0.003229; Average loss:

[32m[I 2022-11-07 09:31:49,867][0m Trial 63 finished with value: 0.5211680053547523 and parameters: {'layer_count': 203, 'step_size': 4, 'batch_size': 352, 'learning_rate': 0.007476477570056466, 'gamma': 0.6572037510609827}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1035.51 sec; lr: 0.000396; Average loss: 4.00, Train accuracy: 0.6070, Val accuracy: 0.5221
True False             True True
Epoch: 0; 34.86 sec; lr: 0.005701; Average loss: 371.31, Train accuracy: 0.5392, Val accuracy: 0.5000
Epoch: 1; 69.45 sec; lr: 0.005701; Average loss: 100.84, Train accuracy: 0.5390, Val accuracy: 0.4940
Epoch: 2; 104.13 sec; lr: 0.005701; Average loss: 41.01, Train accuracy: 0.5489, Val accuracy: 0.4960
Epoch: 3; 138.72 sec; lr: 0.003724; Average loss: 32.08, Train accuracy: 0.5727, Val accuracy: 0.5000
Epoch: 4; 173.32 sec; lr: 0.003724; Average loss: 47.01, Train accuracy: 0.5525, Val accuracy: 0.5261
False False             False True
Epoch: 5; 207.99 sec; lr: 0.003724; Average loss: 36.21, Train accuracy: 0.5501, Val accuracy: 0.5080
True False             False False
Epoch: 6; 242.65 sec; lr: 0.003724; Average loss: 16.39, Train accuracy: 0.5951, Val accuracy: 0.5241
False True             False False
Epoch: 7; 277.27 sec; lr: 0.002432; Average l

[32m[I 2022-11-07 09:49:11,865][0m Trial 64 finished with value: 0.49871151271753683 and parameters: {'layer_count': 203, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.005700557115997103, 'gamma': 0.6531936603501893}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1039.39 sec; lr: 0.000289; Average loss: 4.81, Train accuracy: 0.5827, Val accuracy: 0.4699
True True             True False
Epoch: 0; 34.85 sec; lr: 0.007644; Average loss: 1014.78, Train accuracy: 0.5236, Val accuracy: 0.5000
Epoch: 1; 69.54 sec; lr: 0.007644; Average loss: 342.93, Train accuracy: 0.5483, Val accuracy: 0.5000
Epoch: 2; 104.24 sec; lr: 0.007644; Average loss: 269.85, Train accuracy: 0.5217, Val accuracy: 0.5000
Epoch: 3; 139.02 sec; lr: 0.005122; Average loss: 66.25, Train accuracy: 0.5477, Val accuracy: 0.5000


[32m[I 2022-11-07 09:52:08,067][0m Trial 65 finished with value: 0.3928212851405622 and parameters: {'layer_count': 202, 'step_size': 4, 'batch_size': 416, 'learning_rate': 0.007644329499377031, 'gamma': 0.6700857571304121}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 4; 173.75 sec; lr: 0.005122; Average loss: 195.93, Train accuracy: 0.5080, Val accuracy: 0.4900
True True             True True
pruned


[32m[I 2022-11-07 09:52:11,393][0m Trial 66 finished with value: 0.0 and parameters: {'layer_count': 204, 'step_size': 4, 'batch_size': 672, 'learning_rate': 0.007063229903114826, 'gamma': 0.7011301650990266}. Best is trial 54 with value: 0.5353580990629183.[0m


Exception: CUDA out of memory. Tried to allocate 964.00 MiB (GPU 0; 8.00 GiB total capacity; 4.49 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 34.61 sec; lr: 0.006150; Average loss: 99.39, Train accuracy: 0.5722, Val accuracy: 0.5382
Epoch: 1; 69.15 sec; lr: 0.006150; Average loss: 60.64, Train accuracy: 0.5809, Val accuracy: 0.4920
Epoch: 2; 103.70 sec; lr: 0.006150; Average loss: 53.19, Train accuracy: 0.5676, Val accuracy: 0.5000
Epoch: 3; 138.27 sec; lr: 0.003467; Average loss: 57.05, Train accuracy: 0.5794, Val accuracy: 0.5000
Epoch: 4; 172.89 sec; lr: 0.003467; Average loss: 56.73, Train accuracy: 0.5930, Val accuracy: 0.4900
True True             False True
Epoch: 5; 207.51 sec; lr: 0.003467; Average loss: 23.03, Train accuracy: 0.5925, Val accuracy: 0.5000
False True        

[32m[I 2022-11-07 10:09:33,179][0m Trial 67 finished with value: 0.5088688085676037 and parameters: {'layer_count': 210, 'step_size': 4, 'batch_size': 96, 'learning_rate': 0.006150377278704876, 'gamma': 0.5637302533104723}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1039.27 sec; lr: 0.000111; Average loss: 4.09, Train accuracy: 0.6113, Val accuracy: 0.4980
True False             True True
Epoch: 0; 34.91 sec; lr: 0.005266; Average loss: 254.14, Train accuracy: 0.5462, Val accuracy: 0.4940
Epoch: 1; 69.70 sec; lr: 0.005266; Average loss: 20.23, Train accuracy: 0.5929, Val accuracy: 0.5000
Epoch: 2; 104.36 sec; lr: 0.005266; Average loss: 90.73, Train accuracy: 0.5509, Val accuracy: 0.5000
Epoch: 3; 139.00 sec; lr: 0.003418; Average loss: 85.32, Train accuracy: 0.5477, Val accuracy: 0.5000
Epoch: 4; 173.62 sec; lr: 0.003418; Average loss: 78.22, Train accuracy: 0.5256, Val accuracy: 0.5100
False True             True False
Epoch: 5; 208.23 sec; lr: 0.003418; Average loss: 24.77, Train accuracy: 0.5698, Val accuracy: 0.5020
True False             True True
Epoch: 6; 242.91 sec; lr: 0.003418; Average loss: 20.66, Train accuracy: 0.5782, Val accuracy: 0.5120
False True             False True
Epoch: 7; 277.52 sec; lr: 0.002218; Average loss: 

[32m[I 2022-11-07 10:26:53,862][0m Trial 68 finished with value: 0.5142570281124499 and parameters: {'layer_count': 203, 'step_size': 4, 'batch_size': 352, 'learning_rate': 0.005266070384372859, 'gamma': 0.6490565743436029}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1038.01 sec; lr: 0.000256; Average loss: 3.90, Train accuracy: 0.6105, Val accuracy: 0.5181
False False             True False
Epoch: 0; 34.64 sec; lr: 0.006639; Average loss: 352.99, Train accuracy: 0.5571, Val accuracy: 0.5020
Epoch: 1; 69.08 sec; lr: 0.006639; Average loss: 140.39, Train accuracy: 0.5331, Val accuracy: 0.5301
Epoch: 2; 103.50 sec; lr: 0.006639; Average loss: 54.80, Train accuracy: 0.5501, Val accuracy: 0.5422
Epoch: 3; 137.97 sec; lr: 0.003948; Average loss: 26.08, Train accuracy: 0.5715, Val accuracy: 0.5100
Epoch: 4; 172.40 sec; lr: 0.003948; Average loss: 50.02, Train accuracy: 0.5560, Val accuracy: 0.5040
True True             False False
Epoch: 5; 206.84 sec; lr: 0.003948; Average loss: 53.24, Train accuracy: 0.5540, Val accuracy: 0.4940
True True             True False
Epoch: 6; 241.31 sec; lr: 0.003948; Average loss: 47.75, Train accuracy: 0.5637, Val accuracy: 0.4960
False True             True True
Epoch: 7; 275.72 sec; lr: 0.002347; Average loss

[32m[I 2022-11-07 10:43:34,642][0m Trial 69 finished with value: 0.5295198033513364 and parameters: {'layer_count': 205, 'step_size': 4, 'batch_size': 416, 'learning_rate': 0.006639134066059621, 'gamma': 0.5946174054570326}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 28; 998.38 sec; lr: 0.000174; Average loss: 8.20, Train accuracy: 0.5744, Val accuracy: 0.5361
True True             True True
pruned


[32m[I 2022-11-07 10:43:37,913][0m Trial 70 finished with value: 0.0 and parameters: {'layer_count': 206, 'step_size': 4, 'batch_size': 1696, 'learning_rate': 0.006636948698463459, 'gamma': 0.5992476741058606}. Best is trial 54 with value: 0.5353580990629183.[0m


Exception: CUDA out of memory. Tried to allocate 880.00 MiB (GPU 0; 8.00 GiB total capacity; 4.62 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 34.65 sec; lr: 0.007229; Average loss: 342.78, Train accuracy: 0.5298, Val accuracy: 0.5161
Epoch: 1; 69.02 sec; lr: 0.007229; Average loss: 34.99, Train accuracy: 0.5730, Val accuracy: 0.4960
Epoch: 2; 103.39 sec; lr: 0.007229; Average loss: 49.81, Train accuracy: 0.5474, Val accuracy: 0.4980
Epoch: 3; 137.76 sec; lr: 0.004833; Average loss: 78.74, Train accuracy: 0.5482, Val accuracy: 0.5361
Epoch: 4; 172.20 sec; lr: 0.004833; Average loss: 19.59, Train accuracy: 0.6096, Val accuracy: 0.5020
True False             False True
Epoch: 5; 206.57 sec; lr: 0.004833; Average loss: 41.12, Train accuracy: 0.5597, Val accuracy: 0.5000
True True       

[32m[I 2022-11-07 10:50:33,270][0m Trial 71 finished with value: 0.4350819946452476 and parameters: {'layer_count': 205, 'step_size': 4, 'batch_size': 416, 'learning_rate': 0.007229352876329516, 'gamma': 0.6684618842334794}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 11; 412.91 sec; lr: 0.002159; Average loss: 45.85, Train accuracy: 0.5541, Val accuracy: 0.4960
True True             True True
pruned
Epoch: 0; 34.84 sec; lr: 0.006340; Average loss: 760.55, Train accuracy: 0.5358, Val accuracy: 0.5040
Epoch: 1; 69.49 sec; lr: 0.006340; Average loss: 162.41, Train accuracy: 0.5681, Val accuracy: 0.5000
Epoch: 2; 104.18 sec; lr: 0.006340; Average loss: 92.42, Train accuracy: 0.5118, Val accuracy: 0.5000
Epoch: 3; 138.77 sec; lr: 0.004576; Average loss: 161.12, Train accuracy: 0.5502, Val accuracy: 0.5000


[32m[I 2022-11-07 10:53:29,102][0m Trial 72 finished with value: 0.3970381526104418 and parameters: {'layer_count': 202, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.006339532237881189, 'gamma': 0.7218970872016014}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 4; 173.36 sec; lr: 0.004576; Average loss: 120.14, Train accuracy: 0.5279, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 34.72 sec; lr: 0.008169; Average loss: 423.75, Train accuracy: 0.5401, Val accuracy: 0.5000
Epoch: 1; 69.28 sec; lr: 0.008169; Average loss: 96.33, Train accuracy: 0.5461, Val accuracy: 0.5000
Epoch: 2; 103.83 sec; lr: 0.008169; Average loss: 51.08, Train accuracy: 0.5392, Val accuracy: 0.5000
Epoch: 3; 138.46 sec; lr: 0.004667; Average loss: 35.64, Train accuracy: 0.5337, Val accuracy: 0.5000


[32m[I 2022-11-07 10:56:24,417][0m Trial 73 finished with value: 0.3928212851405622 and parameters: {'layer_count': 201, 'step_size': 4, 'batch_size': 224, 'learning_rate': 0.008168844568472902, 'gamma': 0.5712995158631646}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 4; 173.02 sec; lr: 0.004667; Average loss: 45.24, Train accuracy: 0.5208, Val accuracy: 0.4900
True True             True True
pruned
Epoch: 0; 34.51 sec; lr: 0.007544; Average loss: 372.82, Train accuracy: 0.5388, Val accuracy: 0.5120
Epoch: 1; 68.85 sec; lr: 0.007544; Average loss: 98.15, Train accuracy: 0.5454, Val accuracy: 0.5020
Epoch: 2; 103.22 sec; lr: 0.007544; Average loss: 63.23, Train accuracy: 0.5756, Val accuracy: 0.4960
Epoch: 3; 137.61 sec; lr: 0.003980; Average loss: 54.11, Train accuracy: 0.5540, Val accuracy: 0.5000
Epoch: 4; 172.04 sec; lr: 0.003980; Average loss: 59.89, Train accuracy: 0.5558, Val accuracy: 0.4940
True False             True True
Epoch: 5; 206.40 sec; lr: 0.003980; Average loss: 53.57, Train accuracy: 0.5666, Val accuracy: 0.5040
False True             False True
Epoch: 6; 240.77 sec; lr: 0.003980; Average loss: 43.31, Train accuracy: 0.5708, Val accuracy: 0.5382
False False             True False
Epoch: 7; 275.12 sec; lr: 0.002100; Average 

[32m[I 2022-11-07 11:13:37,659][0m Trial 74 finished with value: 0.5225401606425703 and parameters: {'layer_count': 204, 'step_size': 4, 'batch_size': 352, 'learning_rate': 0.007544185233938366, 'gamma': 0.5276124304308145}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1030.80 sec; lr: 0.000086; Average loss: 5.49, Train accuracy: 0.5989, Val accuracy: 0.5281
False False             True False


[32m[I 2022-11-07 11:13:41,055][0m Trial 75 finished with value: 0.0 and parameters: {'layer_count': 204, 'step_size': 6, 'batch_size': 2016, 'learning_rate': 0.007582971477337425, 'gamma': 0.5329145251334151}. Best is trial 54 with value: 0.5353580990629183.[0m


Exception: CUDA out of memory. Tried to allocate 1.02 GiB (GPU 0; 8.00 GiB total capacity; 4.84 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 34.54 sec; lr: 0.006819; Average loss: 325.46, Train accuracy: 0.5434, Val accuracy: 0.4960
Epoch: 1; 68.93 sec; lr: 0.006819; Average loss: 31.28, Train accuracy: 0.5953, Val accuracy: 0.4859
Epoch: 2; 103.22 sec; lr: 0.006819; Average loss: 121.66, Train accuracy: 0.5677, Val accuracy: 0.4980
Epoch: 3; 137.56 sec; lr: 0.003588; Average loss: 117.47, Train accuracy: 0.5519, Val accuracy: 0.5221
Epoch: 4; 171.91 sec; lr: 0.003588; Average loss: 17.41, Train accuracy: 0.6251, Val accuracy: 0.5060
True False             False True
Epoch: 5; 206.26 sec; lr: 0.003588; Average loss: 28.85, Train accuracy: 0.5704, Val accuracy: 0.5000
True True       

[32m[I 2022-11-07 11:31:28,742][0m Trial 76 finished with value: 0.5111780455153949 and parameters: {'layer_count': 205, 'step_size': 4, 'batch_size': 352, 'learning_rate': 0.006819036193677648, 'gamma': 0.5261154662891004}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1065.29 sec; lr: 0.000076; Average loss: 5.55, Train accuracy: 0.6059, Val accuracy: 0.5020
True False             True False
Epoch: 0; 33.84 sec; lr: 0.007873; Average loss: 98.98, Train accuracy: 0.5488, Val accuracy: 0.5000
Epoch: 1; 67.63 sec; lr: 0.007873; Average loss: 60.88, Train accuracy: 0.5625, Val accuracy: 0.5120
Epoch: 2; 101.43 sec; lr: 0.007873; Average loss: 31.12, Train accuracy: 0.5627, Val accuracy: 0.4920
Epoch: 3; 135.21 sec; lr: 0.004686; Average loss: 56.75, Train accuracy: 0.5696, Val accuracy: 0.4960
Epoch: 4; 169.02 sec; lr: 0.004686; Average loss: 27.08, Train accuracy: 0.5782, Val accuracy: 0.5120
False False             True False
Epoch: 5; 202.84 sec; lr: 0.004686; Average loss: 13.10, Train accuracy: 0.5776, Val accuracy: 0.5000
True False             False True
Epoch: 6; 236.68 sec; lr: 0.004686; Average loss: 24.86, Train accuracy: 0.5774, Val accuracy: 0.5020
False True             False False
Epoch: 7; 270.50 sec; lr: 0.002789; Average los

[32m[I 2022-11-07 11:48:27,669][0m Trial 77 finished with value: 0.5197623828647925 and parameters: {'layer_count': 203, 'step_size': 4, 'batch_size': 160, 'learning_rate': 0.007872525832556109, 'gamma': 0.5952356569239533}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1016.59 sec; lr: 0.000208; Average loss: 1.76, Train accuracy: 0.6041, Val accuracy: 0.5361
False False             False False
Epoch: 0; 34.26 sec; lr: 0.007871; Average loss: 382.04, Train accuracy: 0.5502, Val accuracy: 0.5000
Epoch: 1; 68.28 sec; lr: 0.007871; Average loss: 84.55, Train accuracy: 0.5573, Val accuracy: 0.5000
Epoch: 2; 102.33 sec; lr: 0.007871; Average loss: 51.56, Train accuracy: 0.5432, Val accuracy: 0.5000
Epoch: 3; 136.39 sec; lr: 0.004218; Average loss: 54.16, Train accuracy: 0.5565, Val accuracy: 0.5000


[32m[I 2022-11-07 11:51:20,534][0m Trial 78 finished with value: 0.3946285140562249 and parameters: {'layer_count': 200, 'step_size': 4, 'batch_size': 160, 'learning_rate': 0.007870750498377089, 'gamma': 0.5359071047883067}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 4; 170.47 sec; lr: 0.004218; Average loss: 37.75, Train accuracy: 0.5363, Val accuracy: 0.4960
True True             True True
pruned
Epoch: 0; 34.27 sec; lr: 0.008504; Average loss: 117.90, Train accuracy: 0.5783, Val accuracy: 0.5120
Epoch: 1; 68.46 sec; lr: 0.008504; Average loss: 42.95, Train accuracy: 0.5763, Val accuracy: 0.5040
Epoch: 2; 102.61 sec; lr: 0.008504; Average loss: 33.91, Train accuracy: 0.5808, Val accuracy: 0.5000
Epoch: 3; 136.77 sec; lr: 0.004375; Average loss: 30.49, Train accuracy: 0.5840, Val accuracy: 0.5000


[32m[I 2022-11-07 11:54:13,759][0m Trial 79 finished with value: 0.3984437751004016 and parameters: {'layer_count': 203, 'step_size': 4, 'batch_size': 96, 'learning_rate': 0.008504394956208219, 'gamma': 0.5143966314597428}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 4; 170.91 sec; lr: 0.004375; Average loss: 34.68, Train accuracy: 0.5757, Val accuracy: 0.4960
True True             True True
pruned
Epoch: 0; 35.37 sec; lr: 0.008520; Average loss: 94.22, Train accuracy: 0.5779, Val accuracy: 0.4980
Epoch: 1; 70.53 sec; lr: 0.008520; Average loss: 56.10, Train accuracy: 0.5822, Val accuracy: 0.5000
Epoch: 2; 105.66 sec; lr: 0.008520; Average loss: 40.46, Train accuracy: 0.5735, Val accuracy: 0.4980
Epoch: 3; 140.84 sec; lr: 0.005044; Average loss: 23.11, Train accuracy: 0.5802, Val accuracy: 0.4960
Epoch: 4; 176.05 sec; lr: 0.005044; Average loss: 12.67, Train accuracy: 0.5809, Val accuracy: 0.4980
False True             True False
Epoch: 5; 211.24 sec; lr: 0.005044; Average loss: 11.98, Train accuracy: 0.5792, Val accuracy: 0.5181
False False             True True
Epoch: 6; 246.45 sec; lr: 0.005044; Average loss: 11.82, Train accuracy: 0.5795, Val accuracy: 0.4940
True False             False True
Epoch: 7; 281.67 sec; lr: 0.002986; Average l

[32m[I 2022-11-07 12:11:53,144][0m Trial 80 finished with value: 0.5287483266398929 and parameters: {'layer_count': 203, 'step_size': 4, 'batch_size': 32, 'learning_rate': 0.008519547760681595, 'gamma': 0.5920030918982755}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1057.05 sec; lr: 0.000217; Average loss: 0.88, Train accuracy: 0.6141, Val accuracy: 0.5482
False False             True False
Epoch: 0; 35.48 sec; lr: 0.008503; Average loss: 141.63, Train accuracy: 0.5522, Val accuracy: 0.5000
Epoch: 1; 70.95 sec; lr: 0.008503; Average loss: 28.04, Train accuracy: 0.5566, Val accuracy: 0.5000
Epoch: 2; 106.45 sec; lr: 0.008503; Average loss: 8.16, Train accuracy: 0.5549, Val accuracy: 0.5000
Epoch: 3; 141.90 sec; lr: 0.004992; Average loss: 5.92, Train accuracy: 0.5669, Val accuracy: 0.5000


[32m[I 2022-11-07 12:14:51,383][0m Trial 81 finished with value: 0.3958333333333333 and parameters: {'layer_count': 201, 'step_size': 4, 'batch_size': 32, 'learning_rate': 0.008502693817068982, 'gamma': 0.5871314077105064}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 4; 177.39 sec; lr: 0.004992; Average loss: 3.51, Train accuracy: 0.5814, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 34.15 sec; lr: 0.008187; Average loss: 215.36, Train accuracy: 0.5617, Val accuracy: 0.5020
Epoch: 1; 68.26 sec; lr: 0.008187; Average loss: 65.24, Train accuracy: 0.5539, Val accuracy: 0.5060
Epoch: 2; 102.32 sec; lr: 0.008187; Average loss: 37.09, Train accuracy: 0.5750, Val accuracy: 0.5181
Epoch: 3; 136.41 sec; lr: 0.004973; Average loss: 45.59, Train accuracy: 0.5677, Val accuracy: 0.4960
Epoch: 4; 170.41 sec; lr: 0.004973; Average loss: 24.51, Train accuracy: 0.5810, Val accuracy: 0.4940
True True             False False
Epoch: 5; 204.40 sec; lr: 0.004973; Average loss: 27.57, Train accuracy: 0.5691, Val accuracy: 0.5020
False True             True False
Epoch: 6; 238.39 sec; lr: 0.004973; Average loss: 17.97, Train accuracy: 0.5933, Val accuracy: 0.5020
True False             True True
Epoch: 7; 272.38 sec; lr: 0.003020; Average lo

[32m[I 2022-11-07 12:30:09,931][0m Trial 82 finished with value: 0.50663208389112 and parameters: {'layer_count': 203, 'step_size': 4, 'batch_size': 224, 'learning_rate': 0.00818700512291993, 'gamma': 0.607390358847814}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 26; 917.69 sec; lr: 0.000411; Average loss: 2.79, Train accuracy: 0.5992, Val accuracy: 0.4940
True True             True True
pruned
Epoch: 0; 35.00 sec; lr: 0.007426; Average loss: 79.53, Train accuracy: 0.5707, Val accuracy: 0.5000
Epoch: 1; 70.04 sec; lr: 0.007426; Average loss: 81.25, Train accuracy: 0.5736, Val accuracy: 0.4980
Epoch: 2; 105.11 sec; lr: 0.007426; Average loss: 54.04, Train accuracy: 0.5872, Val accuracy: 0.4980
Epoch: 3; 140.21 sec; lr: 0.004681; Average loss: 44.55, Train accuracy: 0.5879, Val accuracy: 0.4980


[32m[I 2022-11-07 12:33:06,131][0m Trial 83 finished with value: 0.394929718875502 and parameters: {'layer_count': 204, 'step_size': 4, 'batch_size': 32, 'learning_rate': 0.007425624732741424, 'gamma': 0.630391903845249}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 4; 175.32 sec; lr: 0.004681; Average loss: 29.62, Train accuracy: 0.5945, Val accuracy: 0.4980
True True             True True
pruned
Epoch: 0; 34.28 sec; lr: 0.007760; Average loss: 1155.76, Train accuracy: 0.5336, Val accuracy: 0.5000
Epoch: 1; 68.55 sec; lr: 0.007760; Average loss: 378.40, Train accuracy: 0.5123, Val accuracy: 0.5000
Epoch: 2; 102.71 sec; lr: 0.007760; Average loss: 120.96, Train accuracy: 0.5580, Val accuracy: 0.5000
Epoch: 3; 136.93 sec; lr: 0.004237; Average loss: 75.57, Train accuracy: 0.5464, Val accuracy: 0.5000


[32m[I 2022-11-07 12:35:58,005][0m Trial 84 finished with value: 0.3958333333333333 and parameters: {'layer_count': 202, 'step_size': 4, 'batch_size': 480, 'learning_rate': 0.007759665308996548, 'gamma': 0.5460626698976794}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 4; 171.04 sec; lr: 0.004237; Average loss: 32.82, Train accuracy: 0.5486, Val accuracy: 0.5000
True True             True True
pruned


[32m[I 2022-11-07 12:35:59,486][0m Trial 85 finished with value: 0.0 and parameters: {'layer_count': 200, 'step_size': 6, 'batch_size': 1312, 'learning_rate': 0.00933064465570787, 'gamma': 0.592547611952816}. Best is trial 54 with value: 0.5353580990629183.[0m


Exception: CUDA out of memory. Tried to allocate 1.84 GiB (GPU 0; 8.00 GiB total capacity; 4.91 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 33.79 sec; lr: 0.007158; Average loss: 332.30, Train accuracy: 0.5229, Val accuracy: 0.5060
Epoch: 1; 67.54 sec; lr: 0.007158; Average loss: 64.31, Train accuracy: 0.5478, Val accuracy: 0.5120
Epoch: 2; 101.12 sec; lr: 0.007158; Average loss: 38.25, Train accuracy: 0.5749, Val accuracy: 0.4960
Epoch: 3; 134.78 sec; lr: 0.004104; Average loss: 87.52, Train accuracy: 0.5515, Val accuracy: 0.5341
Epoch: 4; 168.76 sec; lr: 0.004104; Average loss: 43.27, Train accuracy: 0.5704, Val accuracy: 0.5261
True False             True False
Epoch: 5; 202.99 sec; lr: 0.004104; Average loss: 20.11, Train accuracy: 0.5991, Val accuracy: 0.5000
True True         

[32m[I 2022-11-07 12:52:15,226][0m Trial 86 finished with value: 0.5264904445367677 and parameters: {'layer_count': 206, 'step_size': 4, 'batch_size': 416, 'learning_rate': 0.007157585667037785, 'gamma': 0.5733189862758883}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 28; 974.87 sec; lr: 0.000146; Average loss: 8.40, Train accuracy: 0.5767, Val accuracy: 0.5281
True True             True True
pruned
Epoch: 0; 33.71 sec; lr: 0.007218; Average loss: 332.07, Train accuracy: 0.5248, Val accuracy: 0.5020
Epoch: 1; 67.23 sec; lr: 0.007218; Average loss: 57.54, Train accuracy: 0.5507, Val accuracy: 0.4980
Epoch: 2; 100.77 sec; lr: 0.007218; Average loss: 58.55, Train accuracy: 0.5529, Val accuracy: 0.4980
Epoch: 3; 134.37 sec; lr: 0.004066; Average loss: 76.35, Train accuracy: 0.5370, Val accuracy: 0.5281
Epoch: 4; 167.93 sec; lr: 0.004066; Average loss: 25.46, Train accuracy: 0.5906, Val accuracy: 0.4900
True False             True True
Epoch: 5; 201.47 sec; lr: 0.004066; Average loss: 64.52, Train accuracy: 0.5375, Val accuracy: 0.4880
True True             False True
Epoch: 6; 235.03 sec; lr: 0.004066; Average loss: 65.54, Train accuracy: 0.5265, Val accuracy: 0.5000
False True             True False
Epoch: 7; 268.57 sec; lr: 0.002290; Average lo

[32m[I 2022-11-07 13:09:04,362][0m Trial 87 finished with value: 0.524330655957162 and parameters: {'layer_count': 205, 'step_size': 4, 'batch_size': 416, 'learning_rate': 0.007218157617173712, 'gamma': 0.5632569468092002}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1007.11 sec; lr: 0.000130; Average loss: 6.40, Train accuracy: 0.6023, Val accuracy: 0.5341
False False             True True
Epoch: 0; 33.72 sec; lr: 0.007159; Average loss: 358.74, Train accuracy: 0.5499, Val accuracy: 0.4839
Epoch: 1; 67.32 sec; lr: 0.007159; Average loss: 78.12, Train accuracy: 0.5645, Val accuracy: 0.5120
Epoch: 2; 100.87 sec; lr: 0.007159; Average loss: 60.17, Train accuracy: 0.5418, Val accuracy: 0.4960
Epoch: 3; 134.43 sec; lr: 0.004145; Average loss: 70.23, Train accuracy: 0.5363, Val accuracy: 0.5060
Epoch: 4; 168.09 sec; lr: 0.004145; Average loss: 35.30, Train accuracy: 0.5729, Val accuracy: 0.5482
False False             True False
Epoch: 5; 201.67 sec; lr: 0.004145; Average loss: 16.60, Train accuracy: 0.5942, Val accuracy: 0.5080
True False             False True
Epoch: 6; 235.24 sec; lr: 0.004145; Average loss: 29.35, Train accuracy: 0.5725, Val accuracy: 0.4980
True True             False False
Epoch: 7; 268.80 sec; lr: 0.002400; Average los

[32m[I 2022-11-07 13:23:19,840][0m Trial 88 finished with value: 0.49976907630522094 and parameters: {'layer_count': 205, 'step_size': 4, 'batch_size': 416, 'learning_rate': 0.007159229663293778, 'gamma': 0.5789618570968862}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 24; 853.42 sec; lr: 0.000270; Average loss: 6.90, Train accuracy: 0.6060, Val accuracy: 0.5201
True True             True True
pruned
Epoch: 0; 35.11 sec; lr: 0.006908; Average loss: 436.75, Train accuracy: 0.5428, Val accuracy: 0.4980
Epoch: 1; 70.09 sec; lr: 0.006908; Average loss: 134.98, Train accuracy: 0.5405, Val accuracy: 0.5161
Epoch: 2; 105.31 sec; lr: 0.006908; Average loss: 65.48, Train accuracy: 0.5478, Val accuracy: 0.4980
Epoch: 3; 140.55 sec; lr: 0.003917; Average loss: 53.31, Train accuracy: 0.5668, Val accuracy: 0.5000
Epoch: 4; 175.88 sec; lr: 0.003917; Average loss: 63.15, Train accuracy: 0.5489, Val accuracy: 0.5361
False False             True False
Epoch: 5; 211.10 sec; lr: 0.003917; Average loss: 39.93, Train accuracy: 0.5694, Val accuracy: 0.5020
True False             False True
Epoch: 6; 246.48 sec; lr: 0.003917; Average loss: 40.58, Train accuracy: 0.5423, Val accuracy: 0.5321
False True             False False
Epoch: 7; 281.69 sec; lr: 0.002222; Avera

[32m[I 2022-11-07 13:34:23,937][0m Trial 89 finished with value: 0.482781124497992 and parameters: {'layer_count': 206, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.006907634445825945, 'gamma': 0.567108029190434}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 18; 661.89 sec; lr: 0.000714; Average loss: 8.73, Train accuracy: 0.5975, Val accuracy: 0.5341
True True             True True
pruned
Epoch: 0; 34.08 sec; lr: 0.006660; Average loss: 316.59, Train accuracy: 0.5354, Val accuracy: 0.5100
Epoch: 1; 67.85 sec; lr: 0.006660; Average loss: 27.87, Train accuracy: 0.5768, Val accuracy: 0.5120
Epoch: 2; 101.63 sec; lr: 0.006660; Average loss: 43.72, Train accuracy: 0.5603, Val accuracy: 0.5020
Epoch: 3; 135.41 sec; lr: 0.003358; Average loss: 121.52, Train accuracy: 0.5520, Val accuracy: 0.4980
Epoch: 4; 169.18 sec; lr: 0.003358; Average loss: 104.37, Train accuracy: 0.5280, Val accuracy: 0.5161
False True             True False
Epoch: 5; 202.99 sec; lr: 0.003358; Average loss: 19.73, Train accuracy: 0.6106, Val accuracy: 0.5120
True False             True True
Epoch: 6; 236.71 sec; lr: 0.003358; Average loss: 21.30, Train accuracy: 0.5846, Val accuracy: 0.5020
True True             False True
Epoch: 7; 270.45 sec; lr: 0.001693; Average 

[32m[I 2022-11-07 13:51:19,073][0m Trial 90 finished with value: 0.5205823293172691 and parameters: {'layer_count': 204, 'step_size': 4, 'batch_size': 352, 'learning_rate': 0.006659783009551133, 'gamma': 0.5041841402026228}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1013.06 sec; lr: 0.000055; Average loss: 5.38, Train accuracy: 0.6061, Val accuracy: 0.5201
False False             True False


[32m[I 2022-11-07 13:51:22,043][0m Trial 91 finished with value: 0.0 and parameters: {'layer_count': 204, 'step_size': 4, 'batch_size': 672, 'learning_rate': 0.005996783935782761, 'gamma': 0.5012868619514188}. Best is trial 54 with value: 0.5353580990629183.[0m


Exception: CUDA out of memory. Tried to allocate 964.00 MiB (GPU 0; 8.00 GiB total capacity; 4.49 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 33.79 sec; lr: 0.008691; Average loss: 109.11, Train accuracy: 0.5470, Val accuracy: 0.5000
Epoch: 1; 67.46 sec; lr: 0.008691; Average loss: 105.45, Train accuracy: 0.5393, Val accuracy: 0.4940
Epoch: 2; 101.21 sec; lr: 0.008691; Average loss: 68.27, Train accuracy: 0.5626, Val accuracy: 0.5000
Epoch: 3; 134.91 sec; lr: 0.004489; Average loss: 149.09, Train accuracy: 0.5193, Val accuracy: 0.5000
Epoch: 4; 168.61 sec; lr: 0.004489; Average loss: 105.59, Train accuracy: 0.5501, Val accuracy: 0.4980
True True             False True
Epoch: 5; 202.32 sec; lr: 0.004489; Average loss: 69.02, Train accuracy: 0.5471, Val accuracy: 0.5181
False True    

[32m[I 2022-11-07 13:57:01,196][0m Trial 92 finished with value: 0.42228915662650607 and parameters: {'layer_count': 204, 'step_size': 4, 'batch_size': 416, 'learning_rate': 0.00869088243054087, 'gamma': 0.5165640141066843}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 9; 337.15 sec; lr: 0.002319; Average loss: 26.49, Train accuracy: 0.5632, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 33.90 sec; lr: 0.006675; Average loss: 228.84, Train accuracy: 0.5553, Val accuracy: 0.5221
Epoch: 1; 67.60 sec; lr: 0.006675; Average loss: 33.85, Train accuracy: 0.5695, Val accuracy: 0.5141
Epoch: 2; 101.33 sec; lr: 0.006675; Average loss: 70.69, Train accuracy: 0.5608, Val accuracy: 0.5181
Epoch: 3; 135.07 sec; lr: 0.003610; Average loss: 21.93, Train accuracy: 0.5932, Val accuracy: 0.5000
Epoch: 4; 168.78 sec; lr: 0.003610; Average loss: 75.45, Train accuracy: 0.5496, Val accuracy: 0.5000
True True             False True
Epoch: 5; 202.66 sec; lr: 0.003610; Average loss: 79.19, Train accuracy: 0.5404, Val accuracy: 0.5000
True True             True False
Epoch: 6; 236.53 sec; lr: 0.003610; Average loss: 88.17, Train accuracy: 0.5312, Val accuracy: 0.5040
False True             True True
Epoch: 7; 270.44 sec; lr: 0.001952; Average los

[32m[I 2022-11-07 14:13:54,398][0m Trial 93 finished with value: 0.5157797858099062 and parameters: {'layer_count': 205, 'step_size': 4, 'batch_size': 352, 'learning_rate': 0.0066748885382981295, 'gamma': 0.5408251118598432}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1011.10 sec; lr: 0.000090; Average loss: 6.70, Train accuracy: 0.6063, Val accuracy: 0.5020
True False             True True
Epoch: 0; 33.74 sec; lr: 0.007305; Average loss: 410.07, Train accuracy: 0.5572, Val accuracy: 0.4980
Epoch: 1; 67.41 sec; lr: 0.007305; Average loss: 191.59, Train accuracy: 0.5239, Val accuracy: 0.4960
Epoch: 2; 100.94 sec; lr: 0.007305; Average loss: 119.23, Train accuracy: 0.5262, Val accuracy: 0.5100
Epoch: 3; 134.53 sec; lr: 0.003805; Average loss: 26.20, Train accuracy: 0.6125, Val accuracy: 0.5141
Epoch: 4; 168.15 sec; lr: 0.003805; Average loss: 33.77, Train accuracy: 0.5650, Val accuracy: 0.5321
False False             False True
Epoch: 5; 201.72 sec; lr: 0.003805; Average loss: 17.23, Train accuracy: 0.5884, Val accuracy: 0.5221
True False             False False
Epoch: 6; 235.27 sec; lr: 0.003805; Average loss: 34.73, Train accuracy: 0.5447, Val accuracy: 0.5100
True True             False False
Epoch: 7; 268.90 sec; lr: 0.001982; Average l

[32m[I 2022-11-07 14:30:44,531][0m Trial 94 finished with value: 0.5177376171352075 and parameters: {'layer_count': 206, 'step_size': 4, 'batch_size': 480, 'learning_rate': 0.007304800839777434, 'gamma': 0.5209007537042507}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1008.02 sec; lr: 0.000076; Average loss: 5.95, Train accuracy: 0.5990, Val accuracy: 0.5201
True False             True True
Epoch: 0; 33.86 sec; lr: 0.006291; Average loss: 295.33, Train accuracy: 0.5468, Val accuracy: 0.4960
Epoch: 1; 67.66 sec; lr: 0.006291; Average loss: 36.13, Train accuracy: 0.5852, Val accuracy: 0.4980
Epoch: 2; 101.38 sec; lr: 0.006291; Average loss: 38.41, Train accuracy: 0.5749, Val accuracy: 0.4960
Epoch: 3; 135.10 sec; lr: 0.003437; Average loss: 33.57, Train accuracy: 0.5721, Val accuracy: 0.5281
Epoch: 4; 168.84 sec; lr: 0.003437; Average loss: 20.23, Train accuracy: 0.5718, Val accuracy: 0.4940
True False             True False
Epoch: 5; 202.57 sec; lr: 0.003437; Average loss: 41.47, Train accuracy: 0.5514, Val accuracy: 0.5321
False True             False True
Epoch: 6; 236.30 sec; lr: 0.003437; Average loss: 12.06, Train accuracy: 0.5888, Val accuracy: 0.5060
True False             True False
Epoch: 7; 270.16 sec; lr: 0.001878; Average loss:

[32m[I 2022-11-07 14:45:57,974][0m Trial 95 finished with value: 0.5139390896921018 and parameters: {'layer_count': 203, 'step_size': 4, 'batch_size': 352, 'learning_rate': 0.006290932435092677, 'gamma': 0.5464119488079436}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 26; 911.36 sec; lr: 0.000167; Average loss: 4.21, Train accuracy: 0.5925, Val accuracy: 0.5382
True True             True True
pruned
Epoch: 0; 33.91 sec; lr: 0.005718; Average loss: 241.92, Train accuracy: 0.5437, Val accuracy: 0.5000
Epoch: 1; 67.65 sec; lr: 0.005718; Average loss: 149.17, Train accuracy: 0.5499, Val accuracy: 0.5000
Epoch: 2; 101.43 sec; lr: 0.005718; Average loss: 56.82, Train accuracy: 0.5425, Val accuracy: 0.5000
Epoch: 3; 135.21 sec; lr: 0.003256; Average loss: 65.04, Train accuracy: 0.5514, Val accuracy: 0.5000


[32m[I 2022-11-07 14:48:49,105][0m Trial 96 finished with value: 0.3958333333333333 and parameters: {'layer_count': 202, 'step_size': 4, 'batch_size': 224, 'learning_rate': 0.005718417523822279, 'gamma': 0.5694125562917509}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 4; 169.11 sec; lr: 0.003256; Average loss: 46.57, Train accuracy: 0.5490, Val accuracy: 0.5000
True True             True True
pruned


[32m[I 2022-11-07 14:48:51,761][0m Trial 97 finished with value: 0.0 and parameters: {'layer_count': 205, 'step_size': 4, 'batch_size': 736, 'learning_rate': 0.009988473903234813, 'gamma': 0.6921617131481058}. Best is trial 54 with value: 0.5353580990629183.[0m


Exception: CUDA out of memory. Tried to allocate 1.03 GiB (GPU 0; 8.00 GiB total capacity; 4.51 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 33.93 sec; lr: 0.007147; Average loss: 1036.10, Train accuracy: 0.5314, Val accuracy: 0.5000
Epoch: 1; 67.68 sec; lr: 0.007147; Average loss: 162.32, Train accuracy: 0.5091, Val accuracy: 0.5000
Epoch: 2; 101.52 sec; lr: 0.007147; Average loss: 78.33, Train accuracy: 0.5455, Val accuracy: 0.4900
Epoch: 3; 135.29 sec; lr: 0.003974; Average loss: 144.90, Train accuracy: 0.5369, Val accuracy: 0.5000
Epoch: 4; 169.06 sec; lr: 0.003974; Average loss: 110.44, Train accuracy: 0.5337, Val accuracy: 0.5000
True False             True True
Epoch: 5; 202.91 sec; lr: 0.003974; Average loss: 83.69, Train accuracy: 0.5461, Val accuracy: 0.5000
True True      

[32m[I 2022-11-07 14:53:24,283][0m Trial 98 finished with value: 0.408019578313253 and parameters: {'layer_count': 199, 'step_size': 4, 'batch_size': 480, 'learning_rate': 0.0071472688210041544, 'gamma': 0.5560486043129862}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 7; 270.49 sec; lr: 0.002210; Average loss: 33.87, Train accuracy: 0.5299, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 33.79 sec; lr: 0.007516; Average loss: 252.15, Train accuracy: 0.5644, Val accuracy: 0.5020
Epoch: 1; 67.34 sec; lr: 0.007516; Average loss: 85.88, Train accuracy: 0.5590, Val accuracy: 0.4980
Epoch: 2; 100.89 sec; lr: 0.007516; Average loss: 94.72, Train accuracy: 0.5546, Val accuracy: 0.5241
Epoch: 3; 134.43 sec; lr: 0.004569; Average loss: 61.49, Train accuracy: 0.5768, Val accuracy: 0.4940
Epoch: 4; 168.00 sec; lr: 0.004569; Average loss: 51.71, Train accuracy: 0.5803, Val accuracy: 0.5060
False True             False True
Epoch: 5; 201.55 sec; lr: 0.004569; Average loss: 26.87, Train accuracy: 0.5918, Val accuracy: 0.5000
True False             True False
Epoch: 6; 235.18 sec; lr: 0.004569; Average loss: 40.64, Train accuracy: 0.5692, Val accuracy: 0.5000
True True             False True
Epoch: 7; 268.73 sec; lr: 0.002777; Average l

[32m[I 2022-11-07 15:01:16,524][0m Trial 99 finished with value: 0.4403614457831325 and parameters: {'layer_count': 204, 'step_size': 4, 'batch_size': 288, 'learning_rate': 0.007516479963786418, 'gamma': 0.607835969360948}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 13; 470.21 sec; lr: 0.001688; Average loss: 10.67, Train accuracy: 0.5800, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 33.70 sec; lr: 0.006471; Average loss: 409.09, Train accuracy: 0.5422, Val accuracy: 0.4920
Epoch: 1; 67.19 sec; lr: 0.006471; Average loss: 110.25, Train accuracy: 0.5425, Val accuracy: 0.5020
Epoch: 2; 100.78 sec; lr: 0.006471; Average loss: 72.98, Train accuracy: 0.5255, Val accuracy: 0.5321
Epoch: 3; 134.29 sec; lr: 0.003717; Average loss: 26.05, Train accuracy: 0.5907, Val accuracy: 0.4980
Epoch: 4; 167.79 sec; lr: 0.003717; Average loss: 40.58, Train accuracy: 0.5519, Val accuracy: 0.5040
False True             False False
Epoch: 5; 201.39 sec; lr: 0.003717; Average loss: 37.43, Train accuracy: 0.5336, Val accuracy: 0.4960
True False             True False
Epoch: 6; 234.88 sec; lr: 0.003717; Average loss: 37.51, Train accuracy: 0.5328, Val accuracy: 0.5120
False True             False True
Epoch: 7; 268.38 sec; lr: 0.002135; Avera

[32m[I 2022-11-07 15:10:48,784][0m Trial 100 finished with value: 0.46659284195605955 and parameters: {'layer_count': 208, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.006471394125546257, 'gamma': 0.5743509643756165}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 16; 570.19 sec; lr: 0.000704; Average loss: 10.58, Train accuracy: 0.5912, Val accuracy: 0.5141
True True             True True
pruned
Epoch: 0; 33.87 sec; lr: 0.007944; Average loss: 231.83, Train accuracy: 0.5564, Val accuracy: 0.5281
Epoch: 1; 67.49 sec; lr: 0.007944; Average loss: 19.77, Train accuracy: 0.5766, Val accuracy: 0.5000
Epoch: 2; 101.08 sec; lr: 0.007944; Average loss: 50.12, Train accuracy: 0.5679, Val accuracy: 0.4980
Epoch: 3; 134.76 sec; lr: 0.004681; Average loss: 24.70, Train accuracy: 0.5773, Val accuracy: 0.5020
Epoch: 4; 168.47 sec; lr: 0.004681; Average loss: 34.96, Train accuracy: 0.5731, Val accuracy: 0.4960
True False             True True
Epoch: 5; 202.15 sec; lr: 0.004681; Average loss: 22.51, Train accuracy: 0.5790, Val accuracy: 0.5100
False True             False True
Epoch: 6; 235.87 sec; lr: 0.004681; Average loss: 14.60, Train accuracy: 0.5748, Val accuracy: 0.5000
True False             True False
Epoch: 7; 269.72 sec; lr: 0.002758; Average 

[32m[I 2022-11-07 15:27:51,446][0m Trial 101 finished with value: 0.5090194109772423 and parameters: {'layer_count': 203, 'step_size': 4, 'batch_size': 160, 'learning_rate': 0.007944341940747059, 'gamma': 0.5891691884619031}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1020.59 sec; lr: 0.000196; Average loss: 1.80, Train accuracy: 0.6067, Val accuracy: 0.5060
True False             True False
Epoch: 0; 34.49 sec; lr: 0.006832; Average loss: 227.82, Train accuracy: 0.5440, Val accuracy: 0.5000
Epoch: 1; 68.91 sec; lr: 0.006832; Average loss: 129.33, Train accuracy: 0.5579, Val accuracy: 0.5000
Epoch: 2; 103.42 sec; lr: 0.006832; Average loss: 38.28, Train accuracy: 0.5405, Val accuracy: 0.5000
Epoch: 3; 137.75 sec; lr: 0.003472; Average loss: 19.82, Train accuracy: 0.5476, Val accuracy: 0.5000


[32m[I 2022-11-07 15:30:45,742][0m Trial 102 finished with value: 0.3958333333333333 and parameters: {'layer_count': 202, 'step_size': 4, 'batch_size': 96, 'learning_rate': 0.006832489842163553, 'gamma': 0.5081032715342527}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 4; 172.10 sec; lr: 0.003472; Average loss: 16.64, Train accuracy: 0.5578, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 34.04 sec; lr: 0.008312; Average loss: 188.05, Train accuracy: 0.5717, Val accuracy: 0.4980
Epoch: 1; 67.92 sec; lr: 0.008312; Average loss: 43.38, Train accuracy: 0.5724, Val accuracy: 0.4980
Epoch: 2; 101.78 sec; lr: 0.008312; Average loss: 31.74, Train accuracy: 0.5670, Val accuracy: 0.5020
Epoch: 3; 135.63 sec; lr: 0.006262; Average loss: 47.09, Train accuracy: 0.5769, Val accuracy: 0.5040
Epoch: 4; 169.48 sec; lr: 0.006262; Average loss: 32.31, Train accuracy: 0.5833, Val accuracy: 0.5020
True False             False True
Epoch: 5; 203.29 sec; lr: 0.006262; Average loss: 17.81, Train accuracy: 0.5844, Val accuracy: 0.5040
False True             False False
Epoch: 6; 237.13 sec; lr: 0.006262; Average loss: 16.07, Train accuracy: 0.5810, Val accuracy: 0.5020
True False             True False
Epoch: 7; 270.97 sec; lr: 0.004717; Average

[32m[I 2022-11-07 15:47:43,154][0m Trial 103 finished with value: 0.5204317269076305 and parameters: {'layer_count': 203, 'step_size': 4, 'batch_size': 160, 'learning_rate': 0.008312479238707484, 'gamma': 0.7533329732530711}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1015.38 sec; lr: 0.001145; Average loss: 3.91, Train accuracy: 0.5868, Val accuracy: 0.5382
False True             True False
Epoch: 0; 34.28 sec; lr: 0.008313; Average loss: 459.87, Train accuracy: 0.5549, Val accuracy: 0.5000
Epoch: 1; 68.42 sec; lr: 0.008313; Average loss: 301.62, Train accuracy: 0.5141, Val accuracy: 0.5000
Epoch: 2; 102.58 sec; lr: 0.008313; Average loss: 42.97, Train accuracy: 0.5241, Val accuracy: 0.5000
Epoch: 3; 136.80 sec; lr: 0.006290; Average loss: 68.80, Train accuracy: 0.5250, Val accuracy: 0.5000


[32m[I 2022-11-07 15:50:36,377][0m Trial 104 finished with value: 0.3958333333333333 and parameters: {'layer_count': 201, 'step_size': 4, 'batch_size': 288, 'learning_rate': 0.008312771456345973, 'gamma': 0.7567068797865739}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 4; 170.97 sec; lr: 0.006290; Average loss: 44.65, Train accuracy: 0.5423, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 34.02 sec; lr: 0.008051; Average loss: 371.16, Train accuracy: 0.5328, Val accuracy: 0.4980
Epoch: 1; 67.93 sec; lr: 0.008051; Average loss: 66.71, Train accuracy: 0.5545, Val accuracy: 0.5040
Epoch: 2; 101.93 sec; lr: 0.008051; Average loss: 33.98, Train accuracy: 0.5875, Val accuracy: 0.4960
Epoch: 3; 135.84 sec; lr: 0.005787; Average loss: 136.78, Train accuracy: 0.5302, Val accuracy: 0.4980
Epoch: 4; 169.75 sec; lr: 0.005787; Average loss: 108.72, Train accuracy: 0.5229, Val accuracy: 0.5703
False False             True False
Epoch: 5; 203.66 sec; lr: 0.005787; Average loss: 30.49, Train accuracy: 0.5939, Val accuracy: 0.4960
True False             False True
Epoch: 6; 237.68 sec; lr: 0.005787; Average loss: 70.54, Train accuracy: 0.5547, Val accuracy: 0.4980
False True             False False
Epoch: 7; 271.61 sec; lr: 0.004160; Aver

[32m[I 2022-11-07 15:57:59,896][0m Trial 105 finished with value: 0.45032823602100713 and parameters: {'layer_count': 205, 'step_size': 4, 'batch_size': 416, 'learning_rate': 0.008050850962618398, 'gamma': 0.7188364752082258}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 12; 441.34 sec; lr: 0.002990; Average loss: 17.56, Train accuracy: 0.5978, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 34.04 sec; lr: 0.008890; Average loss: 375.13, Train accuracy: 0.5447, Val accuracy: 0.5000
Epoch: 1; 67.99 sec; lr: 0.008890; Average loss: 166.33, Train accuracy: 0.5234, Val accuracy: 0.4980
Epoch: 2; 101.92 sec; lr: 0.008890; Average loss: 109.66, Train accuracy: 0.5512, Val accuracy: 0.4920
Epoch: 3; 135.83 sec; lr: 0.008890; Average loss: 58.36, Train accuracy: 0.5805, Val accuracy: 0.5442
Epoch: 4; 169.83 sec; lr: 0.008890; Average loss: 33.68, Train accuracy: 0.5741, Val accuracy: 0.5020
True False             True True
Epoch: 5; 203.78 sec; lr: 0.008302; Average loss: 36.12, Train accuracy: 0.5782, Val accuracy: 0.5000
True True             False True
Epoch: 6; 237.71 sec; lr: 0.008302; Average loss: 39.90, Train accuracy: 0.5813, Val accuracy: 0.4980
True True             True False
Epoch: 7; 271.72 sec; lr: 0.008302; Average 

[32m[I 2022-11-07 16:04:49,411][0m Trial 106 finished with value: 0.4387215528781794 and parameters: {'layer_count': 204, 'step_size': 6, 'batch_size': 352, 'learning_rate': 0.008889773564242351, 'gamma': 0.9338274580945627}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 11; 407.44 sec; lr: 0.007752; Average loss: 95.47, Train accuracy: 0.5552, Val accuracy: 0.4980
True True             True True
pruned
Epoch: 0; 34.05 sec; lr: 0.007000; Average loss: 294.92, Train accuracy: 0.5580, Val accuracy: 0.5000
Epoch: 1; 67.90 sec; lr: 0.007000; Average loss: 170.50, Train accuracy: 0.5306, Val accuracy: 0.5100
Epoch: 2; 101.76 sec; lr: 0.007000; Average loss: 30.46, Train accuracy: 0.5925, Val accuracy: 0.4980
Epoch: 3; 135.63 sec; lr: 0.005478; Average loss: 171.66, Train accuracy: 0.5302, Val accuracy: 0.5341
Epoch: 4; 169.48 sec; lr: 0.005478; Average loss: 25.22, Train accuracy: 0.5992, Val accuracy: 0.5000
True False             True False
Epoch: 5; 203.40 sec; lr: 0.005478; Average loss: 62.43, Train accuracy: 0.5710, Val accuracy: 0.5040
False True             False True
Epoch: 6; 237.28 sec; lr: 0.005478; Average loss: 53.98, Train accuracy: 0.5696, Val accuracy: 0.5442
False False             True False
Epoch: 7; 271.11 sec; lr: 0.004287; Aver

[32m[I 2022-11-07 16:21:47,345][0m Trial 107 finished with value: 0.5197791164658634 and parameters: {'layer_count': 206, 'step_size': 4, 'batch_size': 288, 'learning_rate': 0.007000471600526902, 'gamma': 0.7825137633725077}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1015.86 sec; lr: 0.001258; Average loss: 10.84, Train accuracy: 0.6115, Val accuracy: 0.5241
True False             False True
Epoch: 0; 34.06 sec; lr: 0.007611; Average loss: 266.14, Train accuracy: 0.5598, Val accuracy: 0.5060
Epoch: 1; 68.00 sec; lr: 0.007611; Average loss: 49.07, Train accuracy: 0.5683, Val accuracy: 0.5060
Epoch: 2; 101.95 sec; lr: 0.007611; Average loss: 26.06, Train accuracy: 0.5788, Val accuracy: 0.4960
Epoch: 3; 136.00 sec; lr: 0.004015; Average loss: 45.14, Train accuracy: 0.5655, Val accuracy: 0.4960
Epoch: 4; 169.94 sec; lr: 0.004015; Average loss: 27.70, Train accuracy: 0.5724, Val accuracy: 0.5141
False True             True True
Epoch: 5; 203.88 sec; lr: 0.004015; Average loss: 11.80, Train accuracy: 0.5906, Val accuracy: 0.5000
True False             True True
Epoch: 6; 237.87 sec; lr: 0.004015; Average loss: 14.29, Train accuracy: 0.5847, Val accuracy: 0.5060
False True             False True
Epoch: 7; 271.81 sec; lr: 0.002118; Average loss:

[32m[I 2022-11-07 16:38:49,055][0m Trial 108 finished with value: 0.5336345381526104 and parameters: {'layer_count': 203, 'step_size': 4, 'batch_size': 224, 'learning_rate': 0.0076112791674233715, 'gamma': 0.5275604465714963}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1019.59 sec; lr: 0.000087; Average loss: 2.62, Train accuracy: 0.5998, Val accuracy: 0.5502
False True             False True
Epoch: 0; 34.20 sec; lr: 0.007538; Average loss: 573.57, Train accuracy: 0.5276, Val accuracy: 0.5000
Epoch: 1; 68.35 sec; lr: 0.007538; Average loss: 89.52, Train accuracy: 0.5384, Val accuracy: 0.5000
Epoch: 2; 102.45 sec; lr: 0.007538; Average loss: 123.58, Train accuracy: 0.5373, Val accuracy: 0.5000
Epoch: 3; 136.54 sec; lr: 0.003981; Average loss: 159.90, Train accuracy: 0.5172, Val accuracy: 0.4940
Epoch: 4; 170.65 sec; lr: 0.003981; Average loss: 26.03, Train accuracy: 0.5376, Val accuracy: 0.4980
False True             True True
Epoch: 5; 204.76 sec; lr: 0.003981; Average loss: 16.17, Train accuracy: 0.5554, Val accuracy: 0.4880
True False             True True
Epoch: 6; 238.87 sec; lr: 0.003981; Average loss: 16.03, Train accuracy: 0.5650, Val accuracy: 0.5000
False True             False True
Epoch: 7; 272.99 sec; lr: 0.002102; Average loss

[32m[I 2022-11-07 16:55:55,265][0m Trial 109 finished with value: 0.5130856760374833 and parameters: {'layer_count': 202, 'step_size': 4, 'batch_size': 224, 'learning_rate': 0.007538297524905402, 'gamma': 0.5280636364436682}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1024.14 sec; lr: 0.000086; Average loss: 0.89, Train accuracy: 0.6142, Val accuracy: 0.5181
True False             True False
Epoch: 0; 34.11 sec; lr: 0.009130; Average loss: 270.98, Train accuracy: 0.5440, Val accuracy: 0.5000
Epoch: 1; 68.01 sec; lr: 0.009130; Average loss: 99.15, Train accuracy: 0.5670, Val accuracy: 0.5000
Epoch: 2; 101.90 sec; lr: 0.009130; Average loss: 82.87, Train accuracy: 0.5441, Val accuracy: 0.5201
Epoch: 3; 135.84 sec; lr: 0.004978; Average loss: 34.22, Train accuracy: 0.5822, Val accuracy: 0.5442
Epoch: 4; 169.71 sec; lr: 0.004978; Average loss: 17.40, Train accuracy: 0.6080, Val accuracy: 0.4980
True False             False True
Epoch: 5; 203.69 sec; lr: 0.004978; Average loss: 39.21, Train accuracy: 0.5513, Val accuracy: 0.5060
False True             False False
Epoch: 6; 237.54 sec; lr: 0.004978; Average loss: 50.55, Train accuracy: 0.5315, Val accuracy: 0.5261
False False             True False
Epoch: 7; 271.43 sec; lr: 0.002715; Average lo

[32m[I 2022-11-07 17:12:54,643][0m Trial 110 finished with value: 0.5207496653279785 and parameters: {'layer_count': 204, 'step_size': 4, 'batch_size': 608, 'learning_rate': 0.009130379102314108, 'gamma': 0.5452637305420175}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1017.28 sec; lr: 0.000131; Average loss: 5.50, Train accuracy: 0.6085, Val accuracy: 0.5261
False False             True True
Epoch: 0; 34.21 sec; lr: 0.009691; Average loss: 519.38, Train accuracy: 0.5164, Val accuracy: 0.5000
Epoch: 1; 68.21 sec; lr: 0.009691; Average loss: 140.05, Train accuracy: 0.5676, Val accuracy: 0.5060
Epoch: 2; 102.07 sec; lr: 0.009691; Average loss: 38.94, Train accuracy: 0.5844, Val accuracy: 0.5000
Epoch: 3; 135.95 sec; lr: 0.005387; Average loss: 97.64, Train accuracy: 0.5380, Val accuracy: 0.5060
Epoch: 4; 169.91 sec; lr: 0.005387; Average loss: 34.83, Train accuracy: 0.5978, Val accuracy: 0.5141
False False             True False
Epoch: 5; 203.82 sec; lr: 0.005387; Average loss: 46.55, Train accuracy: 0.5448, Val accuracy: 0.5382
False False             False True
Epoch: 6; 237.80 sec; lr: 0.005387; Average loss: 23.37, Train accuracy: 0.6192, Val accuracy: 0.5562
False False             False False
Epoch: 7; 271.75 sec; lr: 0.002995; Average

[32m[I 2022-11-07 17:29:55,806][0m Trial 111 finished with value: 0.5249163319946453 and parameters: {'layer_count': 204, 'step_size': 4, 'batch_size': 608, 'learning_rate': 0.009691376194777779, 'gamma': 0.5558875398029607}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1019.10 sec; lr: 0.000159; Average loss: 5.42, Train accuracy: 0.6010, Val accuracy: 0.5141
True True             False True
Epoch: 0; 34.13 sec; lr: 0.009761; Average loss: 633.26, Train accuracy: 0.5261, Val accuracy: 0.5000
Epoch: 1; 68.03 sec; lr: 0.009761; Average loss: 179.32, Train accuracy: 0.5701, Val accuracy: 0.5060
Epoch: 2; 101.95 sec; lr: 0.009761; Average loss: 95.59, Train accuracy: 0.5501, Val accuracy: 0.4980
Epoch: 3; 135.92 sec; lr: 0.005477; Average loss: 41.45, Train accuracy: 0.5920, Val accuracy: 0.5080
Epoch: 4; 169.85 sec; lr: 0.005477; Average loss: 66.85, Train accuracy: 0.5486, Val accuracy: 0.4920
True False             True False
Epoch: 5; 203.85 sec; lr: 0.005477; Average loss: 51.44, Train accuracy: 0.5573, Val accuracy: 0.5141
False True             False True
Epoch: 6; 237.78 sec; lr: 0.005477; Average loss: 24.47, Train accuracy: 0.5939, Val accuracy: 0.5201
False False             True False
Epoch: 7; 271.77 sec; lr: 0.003073; Average los

[32m[I 2022-11-07 17:46:56,838][0m Trial 112 finished with value: 0.5227074966532798 and parameters: {'layer_count': 205, 'step_size': 4, 'batch_size': 608, 'learning_rate': 0.009760665098233411, 'gamma': 0.561112879640209}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1018.95 sec; lr: 0.000171; Average loss: 5.24, Train accuracy: 0.6007, Val accuracy: 0.5301
True False             False False
Epoch: 0; 34.08 sec; lr: 0.009659; Average loss: 498.15, Train accuracy: 0.5551, Val accuracy: 0.4980
Epoch: 1; 67.97 sec; lr: 0.009659; Average loss: 152.76, Train accuracy: 0.5525, Val accuracy: 0.4980
Epoch: 2; 102.01 sec; lr: 0.009659; Average loss: 162.68, Train accuracy: 0.5159, Val accuracy: 0.5141
Epoch: 3; 136.00 sec; lr: 0.005344; Average loss: 68.05, Train accuracy: 0.5584, Val accuracy: 0.5341
Epoch: 4; 170.01 sec; lr: 0.005344; Average loss: 31.63, Train accuracy: 0.5858, Val accuracy: 0.5301
True False             False True
Epoch: 5; 204.01 sec; lr: 0.005344; Average loss: 45.69, Train accuracy: 0.5561, Val accuracy: 0.5201
True True             False False
Epoch: 6; 238.12 sec; lr: 0.005344; Average loss: 16.81, Train accuracy: 0.5988, Val accuracy: 0.5020
True True             True False


[32m[I 2022-11-07 17:51:31,112][0m Trial 113 finished with value: 0.4198795180722891 and parameters: {'layer_count': 205, 'step_size': 4, 'batch_size': 416, 'learning_rate': 0.009658970026293177, 'gamma': 0.5532708660169858}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 7; 272.06 sec; lr: 0.002957; Average loss: 61.68, Train accuracy: 0.5527, Val accuracy: 0.5000
True True             True True
pruned
Epoch: 0; 33.99 sec; lr: 0.009634; Average loss: 450.68, Train accuracy: 0.5488, Val accuracy: 0.4980
Epoch: 1; 67.83 sec; lr: 0.009634; Average loss: 196.79, Train accuracy: 0.5410, Val accuracy: 0.5000
Epoch: 2; 101.80 sec; lr: 0.009634; Average loss: 198.91, Train accuracy: 0.5450, Val accuracy: 0.4859
Epoch: 3; 135.68 sec; lr: 0.005424; Average loss: 130.74, Train accuracy: 0.5578, Val accuracy: 0.5281
Epoch: 4; 169.55 sec; lr: 0.005424; Average loss: 49.14, Train accuracy: 0.5786, Val accuracy: 0.5221
True False             True False
Epoch: 5; 203.57 sec; lr: 0.005424; Average loss: 51.19, Train accuracy: 0.5772, Val accuracy: 0.5020
True True             False True
Epoch: 6; 237.44 sec; lr: 0.005424; Average loss: 64.57, Train accuracy: 0.5560, Val accuracy: 0.4940
True True             True False
Epoch: 7; 271.29 sec; lr: 0.003054; Average

[32m[I 2022-11-07 18:08:30,219][0m Trial 114 finished with value: 0.5200133868808567 and parameters: {'layer_count': 207, 'step_size': 4, 'batch_size': 480, 'learning_rate': 0.00963431790340741, 'gamma': 0.5630201453655909}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1016.94 sec; lr: 0.000173; Average loss: 6.73, Train accuracy: 0.6184, Val accuracy: 0.5221
False True             True False


[32m[I 2022-11-07 18:08:33,195][0m Trial 115 finished with value: 0.0 and parameters: {'layer_count': 206, 'step_size': 4, 'batch_size': 672, 'learning_rate': 0.00981352252802396, 'gamma': 0.5793509899420943}. Best is trial 54 with value: 0.5353580990629183.[0m


Exception: CUDA out of memory. Tried to allocate 964.00 MiB (GPU 0; 8.00 GiB total capacity; 4.49 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF


[32m[I 2022-11-07 18:08:35,904][0m Trial 116 finished with value: 0.0 and parameters: {'layer_count': 203, 'step_size': 4, 'batch_size': 864, 'learning_rate': 0.0010774955182759616, 'gamma': 0.6427674046353028}. Best is trial 54 with value: 0.5353580990629183.[0m


Exception: CUDA out of memory. Tried to allocate 1.21 GiB (GPU 0; 8.00 GiB total capacity; 5.22 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 33.96 sec; lr: 0.009325; Average loss: 403.74, Train accuracy: 0.5236, Val accuracy: 0.5000
Epoch: 1; 67.77 sec; lr: 0.009325; Average loss: 156.31, Train accuracy: 0.5697, Val accuracy: 0.5020
Epoch: 2; 101.67 sec; lr: 0.009325; Average loss: 48.72, Train accuracy: 0.5814, Val accuracy: 0.5040
Epoch: 3; 135.58 sec; lr: 0.005009; Average loss: 41.76, Train accuracy: 0.5647, Val accuracy: 0.5040
Epoch: 4; 169.44 sec; lr: 0.005009; Average loss: 31.25, Train accuracy: 0.5777, Val accuracy: 0.5040
True True             False False
Epoch: 5; 203.40 sec; lr: 0.005009; Average loss: 31.46, Train accuracy: 0.5733, Val accuracy: 0.4980
True True        

[32m[I 2022-11-07 18:22:45,765][0m Trial 117 finished with value: 0.5057329317269076 and parameters: {'layer_count': 205, 'step_size': 4, 'batch_size': 608, 'learning_rate': 0.009324978873468288, 'gamma': 0.5371988432811892}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 24; 847.85 sec; lr: 0.000224; Average loss: 6.83, Train accuracy: 0.5945, Val accuracy: 0.5261
True True             True True
pruned
Epoch: 0; 34.07 sec; lr: 0.009500; Average loss: 570.69, Train accuracy: 0.5541, Val accuracy: 0.4980
Epoch: 1; 67.96 sec; lr: 0.009500; Average loss: 134.65, Train accuracy: 0.5395, Val accuracy: 0.5141
Epoch: 2; 101.79 sec; lr: 0.009500; Average loss: 86.57, Train accuracy: 0.5714, Val accuracy: 0.5020
Epoch: 3; 135.64 sec; lr: 0.005879; Average loss: 96.50, Train accuracy: 0.5458, Val accuracy: 0.4980
Epoch: 4; 169.46 sec; lr: 0.005879; Average loss: 110.36, Train accuracy: 0.5440, Val accuracy: 0.4940
True True             True False
Epoch: 5; 203.36 sec; lr: 0.005879; Average loss: 64.40, Train accuracy: 0.5443, Val accuracy: 0.5141
False True             True True
Epoch: 6; 237.23 sec; lr: 0.005879; Average loss: 27.81, Train accuracy: 0.5978, Val accuracy: 0.5341
False False             True True
Epoch: 7; 271.18 sec; lr: 0.003638; Average 

[32m[I 2022-11-07 18:39:43,160][0m Trial 118 finished with value: 0.5258534136546185 and parameters: {'layer_count': 206, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.009499713609286569, 'gamma': 0.6188136196877351}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1015.29 sec; lr: 0.000330; Average loss: 7.17, Train accuracy: 0.5940, Val accuracy: 0.5321
False False             False True
Epoch: 0; 33.67 sec; lr: 0.009126; Average loss: 464.28, Train accuracy: 0.5395, Val accuracy: 0.5161
Epoch: 1; 67.24 sec; lr: 0.009126; Average loss: 119.58, Train accuracy: 0.5425, Val accuracy: 0.5361
Epoch: 2; 100.73 sec; lr: 0.009126; Average loss: 37.47, Train accuracy: 0.5985, Val accuracy: 0.5020
Epoch: 3; 134.19 sec; lr: 0.005614; Average loss: 116.89, Train accuracy: 0.5405, Val accuracy: 0.4960
Epoch: 4; 167.73 sec; lr: 0.005614; Average loss: 114.28, Train accuracy: 0.5372, Val accuracy: 0.4980
False True             True False
Epoch: 5; 201.18 sec; lr: 0.005614; Average loss: 112.09, Train accuracy: 0.5324, Val accuracy: 0.5000
False False             True True
Epoch: 6; 234.62 sec; lr: 0.005614; Average loss: 65.84, Train accuracy: 0.5358, Val accuracy: 0.5482
False False             False True
Epoch: 7; 268.05 sec; lr: 0.003454; Averag

[32m[I 2022-11-07 18:56:29,380][0m Trial 119 finished with value: 0.5266398929049532 and parameters: {'layer_count': 208, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.009126390879429212, 'gamma': 0.6151932562632767}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1004.08 sec; lr: 0.000304; Average loss: 6.83, Train accuracy: 0.5844, Val accuracy: 0.5221
True False             True True
Epoch: 0; 33.69 sec; lr: 0.009485; Average loss: 596.11, Train accuracy: 0.5427, Val accuracy: 0.5000
Epoch: 1; 67.13 sec; lr: 0.009485; Average loss: 169.81, Train accuracy: 0.5385, Val accuracy: 0.5040
Epoch: 2; 100.50 sec; lr: 0.009485; Average loss: 87.18, Train accuracy: 0.5273, Val accuracy: 0.5020
Epoch: 3; 133.91 sec; lr: 0.005881; Average loss: 92.57, Train accuracy: 0.5380, Val accuracy: 0.4960
Epoch: 4; 167.36 sec; lr: 0.005881; Average loss: 65.88, Train accuracy: 0.5372, Val accuracy: 0.5141
False True             True False
Epoch: 5; 200.73 sec; lr: 0.005881; Average loss: 23.40, Train accuracy: 0.6129, Val accuracy: 0.5382
False False             True True
Epoch: 6; 234.16 sec; lr: 0.005881; Average loss: 23.47, Train accuracy: 0.5849, Val accuracy: 0.5000
True False             False True
Epoch: 7; 267.54 sec; lr: 0.003647; Average loss

[32m[I 2022-11-07 19:13:16,507][0m Trial 120 finished with value: 0.5193607764390897 and parameters: {'layer_count': 210, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.009485323313230109, 'gamma': 0.6200402030409188}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1004.93 sec; lr: 0.000334; Average loss: 6.61, Train accuracy: 0.5997, Val accuracy: 0.5181
False True             False True


[32m[I 2022-11-07 19:13:19,306][0m Trial 121 finished with value: 0.0 and parameters: {'layer_count': 206, 'step_size': 4, 'batch_size': 672, 'learning_rate': 0.008698361375567757, 'gamma': 0.607091451241899}. Best is trial 54 with value: 0.5353580990629183.[0m


Exception: CUDA out of memory. Tried to allocate 964.00 MiB (GPU 0; 8.00 GiB total capacity; 4.49 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF


[32m[I 2022-11-07 19:13:21,897][0m Trial 122 finished with value: 0.0 and parameters: {'layer_count': 208, 'step_size': 4, 'batch_size': 800, 'learning_rate': 0.008996636709672677, 'gamma': 0.5986960978317508}. Best is trial 54 with value: 0.5353580990629183.[0m


Exception: CUDA out of memory. Tried to allocate 1.12 GiB (GPU 0; 8.00 GiB total capacity; 4.87 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 33.76 sec; lr: 0.009530; Average loss: 424.32, Train accuracy: 0.5601, Val accuracy: 0.5000
Epoch: 1; 67.37 sec; lr: 0.009530; Average loss: 162.46, Train accuracy: 0.5155, Val accuracy: 0.5321
Epoch: 2; 100.93 sec; lr: 0.009530; Average loss: 67.84, Train accuracy: 0.5571, Val accuracy: 0.5281
Epoch: 3; 134.58 sec; lr: 0.005329; Average loss: 40.12, Train accuracy: 0.5926, Val accuracy: 0.4940
Epoch: 4; 168.20 sec; lr: 0.005329; Average loss: 62.48, Train accuracy: 0.5575, Val accuracy: 0.5000
False True             True False
Epoch: 5; 201.79 sec; lr: 0.005329; Average loss: 96.30, Train accuracy: 0.5297, Val accuracy: 0.5080
False False      

[32m[I 2022-11-07 19:30:13,481][0m Trial 123 finished with value: 0.5264223560910307 and parameters: {'layer_count': 207, 'step_size': 4, 'batch_size': 480, 'learning_rate': 0.009529917724267827, 'gamma': 0.5591519951415076}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1009.60 sec; lr: 0.000163; Average loss: 6.39, Train accuracy: 0.6088, Val accuracy: 0.5241
False True             False True
Epoch: 0; 33.79 sec; lr: 0.009210; Average loss: 603.74, Train accuracy: 0.5470, Val accuracy: 0.5020
Epoch: 1; 67.46 sec; lr: 0.009210; Average loss: 228.35, Train accuracy: 0.5149, Val accuracy: 0.4960
Epoch: 2; 101.07 sec; lr: 0.009210; Average loss: 61.20, Train accuracy: 0.5766, Val accuracy: 0.5000
Epoch: 3; 134.69 sec; lr: 0.005183; Average loss: 74.40, Train accuracy: 0.5634, Val accuracy: 0.5000
Epoch: 4; 168.38 sec; lr: 0.005183; Average loss: 65.42, Train accuracy: 0.5337, Val accuracy: 0.5341
False True             False True
Epoch: 5; 201.99 sec; lr: 0.005183; Average loss: 41.85, Train accuracy: 0.5632, Val accuracy: 0.5000
True False             True False
Epoch: 6; 235.66 sec; lr: 0.005183; Average loss: 53.76, Train accuracy: 0.5403, Val accuracy: 0.5060
False True             False True
Epoch: 7; 269.37 sec; lr: 0.002917; Average los

[32m[I 2022-11-07 19:47:04,213][0m Trial 124 finished with value: 0.5251506024096385 and parameters: {'layer_count': 207, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.00920970983318697, 'gamma': 0.5627486070234367}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1008.66 sec; lr: 0.000165; Average loss: 6.94, Train accuracy: 0.6010, Val accuracy: 0.5261
False False             True False
Epoch: 0; 33.69 sec; lr: 0.009149; Average loss: 584.73, Train accuracy: 0.5408, Val accuracy: 0.5000
Epoch: 1; 67.22 sec; lr: 0.009149; Average loss: 186.04, Train accuracy: 0.5369, Val accuracy: 0.5261
Epoch: 2; 100.83 sec; lr: 0.009149; Average loss: 83.05, Train accuracy: 0.5468, Val accuracy: 0.5060
Epoch: 3; 134.38 sec; lr: 0.005138; Average loss: 68.28, Train accuracy: 0.5637, Val accuracy: 0.4960
Epoch: 4; 167.91 sec; lr: 0.005138; Average loss: 62.42, Train accuracy: 0.5533, Val accuracy: 0.5522
False True             True False
Epoch: 5; 201.45 sec; lr: 0.005138; Average loss: 21.96, Train accuracy: 0.5978, Val accuracy: 0.5161
True False             True True
Epoch: 6; 235.06 sec; lr: 0.005138; Average loss: 26.59, Train accuracy: 0.5734, Val accuracy: 0.5100
True True             False True
Epoch: 7; 268.61 sec; lr: 0.002885; Average loss

[32m[I 2022-11-07 20:03:53,297][0m Trial 125 finished with value: 0.5278279785809906 and parameters: {'layer_count': 207, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.00914935630679525, 'gamma': 0.5615457943082319}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1007.01 sec; lr: 0.000161; Average loss: 6.54, Train accuracy: 0.5971, Val accuracy: 0.5301
False False             True True
Epoch: 0; 33.64 sec; lr: 0.009215; Average loss: 455.39, Train accuracy: 0.5614, Val accuracy: 0.5020
Epoch: 1; 67.15 sec; lr: 0.009215; Average loss: 187.16, Train accuracy: 0.5224, Val accuracy: 0.4980
Epoch: 2; 100.71 sec; lr: 0.009215; Average loss: 45.36, Train accuracy: 0.5849, Val accuracy: 0.4980
Epoch: 3; 134.19 sec; lr: 0.005330; Average loss: 173.53, Train accuracy: 0.5246, Val accuracy: 0.5020
Epoch: 4; 167.77 sec; lr: 0.005330; Average loss: 53.55, Train accuracy: 0.6034, Val accuracy: 0.5301
False False             True True
Epoch: 5; 201.29 sec; lr: 0.005330; Average loss: 47.33, Train accuracy: 0.5815, Val accuracy: 0.5080
True False             False True
Epoch: 6; 234.80 sec; lr: 0.005330; Average loss: 21.12, Train accuracy: 0.6106, Val accuracy: 0.5040
True True             False False
Epoch: 7; 268.38 sec; lr: 0.003083; Average lo

[32m[I 2022-11-07 20:20:41,298][0m Trial 126 finished with value: 0.5136378848728247 and parameters: {'layer_count': 209, 'step_size': 4, 'batch_size': 480, 'learning_rate': 0.009214780863458972, 'gamma': 0.5784071541603919}. Best is trial 54 with value: 0.5353580990629183.[0m


Epoch: 29; 1005.88 sec; lr: 0.000200; Average loss: 6.73, Train accuracy: 0.6036, Val accuracy: 0.5040
False True             True True
Epoch: 0; 33.69 sec; lr: 0.009456; Average loss: 606.34, Train accuracy: 0.5421, Val accuracy: 0.4960
Epoch: 1; 67.21 sec; lr: 0.009456; Average loss: 185.44, Train accuracy: 0.5399, Val accuracy: 0.5241
Epoch: 2; 100.76 sec; lr: 0.009456; Average loss: 90.99, Train accuracy: 0.5425, Val accuracy: 0.5402
Epoch: 3; 134.38 sec; lr: 0.005542; Average loss: 53.97, Train accuracy: 0.5797, Val accuracy: 0.5402
Epoch: 4; 167.95 sec; lr: 0.005542; Average loss: 64.19, Train accuracy: 0.5528, Val accuracy: 0.5281
True True             False False
Epoch: 5; 201.52 sec; lr: 0.005542; Average loss: 46.37, Train accuracy: 0.5499, Val accuracy: 0.5020
True True             True False
Epoch: 6; 235.17 sec; lr: 0.005542; Average loss: 56.42, Train accuracy: 0.5362, Val accuracy: 0.5241
False True             True True
Epoch: 7; 268.75 sec; lr: 0.003249; Average loss: 

[32m[I 2022-11-07 20:37:32,881][0m Trial 127 finished with value: 0.5431559571619813 and parameters: {'layer_count': 207, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.009455928480805944, 'gamma': 0.5861260817780743}. Best is trial 127 with value: 0.5431559571619813.[0m


Epoch: 29; 1009.44 sec; lr: 0.000225; Average loss: 6.32, Train accuracy: 0.6034, Val accuracy: 0.5522
False True             False True
Epoch: 0; 33.83 sec; lr: 0.009447; Average loss: 501.62, Train accuracy: 0.5359, Val accuracy: 0.4920
Epoch: 1; 67.48 sec; lr: 0.009447; Average loss: 109.95, Train accuracy: 0.5460, Val accuracy: 0.5382
Epoch: 2; 101.13 sec; lr: 0.009447; Average loss: 44.26, Train accuracy: 0.5773, Val accuracy: 0.4920
Epoch: 3; 134.89 sec; lr: 0.005818; Average loss: 60.97, Train accuracy: 0.5560, Val accuracy: 0.5000
Epoch: 4; 168.53 sec; lr: 0.005818; Average loss: 84.05, Train accuracy: 0.5228, Val accuracy: 0.5321
False False             True False
Epoch: 5; 202.21 sec; lr: 0.005818; Average loss: 24.57, Train accuracy: 0.5924, Val accuracy: 0.5020
True False             False True
Epoch: 6; 235.95 sec; lr: 0.005818; Average loss: 69.52, Train accuracy: 0.5326, Val accuracy: 0.5261
False True             False False
Epoch: 7; 269.61 sec; lr: 0.003584; Average l

[32m[I 2022-11-07 20:54:26,002][0m Trial 128 finished with value: 0.5363286479250334 and parameters: {'layer_count': 207, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.009446526063689013, 'gamma': 0.6159347335181243}. Best is trial 127 with value: 0.5431559571619813.[0m


Epoch: 29; 1011.02 sec; lr: 0.000318; Average loss: 7.22, Train accuracy: 0.5971, Val accuracy: 0.5422
False True             False True
Epoch: 0; 33.79 sec; lr: 0.009459; Average loss: 607.39, Train accuracy: 0.5417, Val accuracy: 0.5000
Epoch: 1; 67.55 sec; lr: 0.009459; Average loss: 183.54, Train accuracy: 0.5372, Val accuracy: 0.5241
Epoch: 2; 101.22 sec; lr: 0.009459; Average loss: 91.19, Train accuracy: 0.5393, Val accuracy: 0.5301
Epoch: 3; 134.90 sec; lr: 0.005868; Average loss: 75.79, Train accuracy: 0.5575, Val accuracy: 0.4980
Epoch: 4; 168.65 sec; lr: 0.005868; Average loss: 151.99, Train accuracy: 0.5183, Val accuracy: 0.5141
False True             False False
Epoch: 5; 202.34 sec; lr: 0.005868; Average loss: 55.19, Train accuracy: 0.5573, Val accuracy: 0.5562
False False             True False
Epoch: 6; 236.02 sec; lr: 0.005868; Average loss: 21.86, Train accuracy: 0.6059, Val accuracy: 0.5221
True False             False True
Epoch: 7; 269.76 sec; lr: 0.003640; Average 

[32m[I 2022-11-07 21:07:57,766][0m Trial 129 finished with value: 0.5041373828647925 and parameters: {'layer_count': 208, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.009458982514169788, 'gamma': 0.620339371456096}. Best is trial 127 with value: 0.5431559571619813.[0m


Epoch: 23; 809.67 sec; lr: 0.000539; Average loss: 8.86, Train accuracy: 0.5807, Val accuracy: 0.5361
True True             True True
pruned
Epoch: 0; 33.90 sec; lr: 0.009078; Average loss: 568.25, Train accuracy: 0.5463, Val accuracy: 0.5060
Epoch: 1; 67.74 sec; lr: 0.009078; Average loss: 227.30, Train accuracy: 0.5137, Val accuracy: 0.5040
Epoch: 2; 101.61 sec; lr: 0.009078; Average loss: 65.90, Train accuracy: 0.5637, Val accuracy: 0.5020
Epoch: 3; 135.45 sec; lr: 0.005335; Average loss: 108.56, Train accuracy: 0.5176, Val accuracy: 0.5080
Epoch: 4; 169.45 sec; lr: 0.005335; Average loss: 35.60, Train accuracy: 0.5829, Val accuracy: 0.5141
False False             True True
Epoch: 5; 203.59 sec; lr: 0.005335; Average loss: 43.97, Train accuracy: 0.5682, Val accuracy: 0.4980
True False             False True
Epoch: 6; 237.46 sec; lr: 0.005335; Average loss: 49.26, Train accuracy: 0.5607, Val accuracy: 0.5000
False True             False False
Epoch: 7; 271.30 sec; lr: 0.003135; Avera

[32m[I 2022-11-07 21:24:55,109][0m Trial 130 finished with value: 0.5193607764390897 and parameters: {'layer_count': 207, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.009077974629309028, 'gamma': 0.5876776804271614}. Best is trial 127 with value: 0.5431559571619813.[0m


Epoch: 29; 1015.26 sec; lr: 0.000220; Average loss: 6.36, Train accuracy: 0.6061, Val accuracy: 0.5161
True True             False False


[32m[I 2022-11-07 21:24:57,854][0m Trial 131 finished with value: 0.0 and parameters: {'layer_count': 207, 'step_size': 4, 'batch_size': 736, 'learning_rate': 0.009472724274080075, 'gamma': 0.568069646857529}. Best is trial 127 with value: 0.5431559571619813.[0m


Exception: CUDA out of memory. Tried to allocate 1.03 GiB (GPU 0; 8.00 GiB total capacity; 4.51 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 33.96 sec; lr: 0.009886; Average loss: 333.91, Train accuracy: 0.5375, Val accuracy: 0.5040
Epoch: 1; 67.70 sec; lr: 0.009886; Average loss: 52.37, Train accuracy: 0.5649, Val accuracy: 0.5341
Epoch: 2; 101.51 sec; lr: 0.009886; Average loss: 41.31, Train accuracy: 0.5630, Val accuracy: 0.5060
Epoch: 3; 135.22 sec; lr: 0.005479; Average loss: 44.48, Train accuracy: 0.5678, Val accuracy: 0.5422
Epoch: 4; 168.95 sec; lr: 0.005479; Average loss: 31.45, Train accuracy: 0.5838, Val accuracy: 0.5301
True False             True False
Epoch: 5; 202.78 sec; lr: 0.005479; Average loss: 21.49, Train accuracy: 0.5929, Val accuracy: 0.5080
True True         

[32m[I 2022-11-07 21:41:52,019][0m Trial 132 finished with value: 0.5259538152610441 and parameters: {'layer_count': 208, 'step_size': 4, 'batch_size': 608, 'learning_rate': 0.0098862667079045, 'gamma': 0.5542293413069889}. Best is trial 127 with value: 0.5431559571619813.[0m


Epoch: 29; 1012.13 sec; lr: 0.000159; Average loss: 5.41, Train accuracy: 0.6044, Val accuracy: 0.5201
False True             True True
Epoch: 0; 33.85 sec; lr: 0.009835; Average loss: 350.54, Train accuracy: 0.5432, Val accuracy: 0.4980
Epoch: 1; 67.63 sec; lr: 0.009835; Average loss: 61.68, Train accuracy: 0.5788, Val accuracy: 0.5020
Epoch: 2; 101.32 sec; lr: 0.009835; Average loss: 96.26, Train accuracy: 0.5428, Val accuracy: 0.5502
Epoch: 3; 135.08 sec; lr: 0.005429; Average loss: 53.64, Train accuracy: 0.5691, Val accuracy: 0.5060
Epoch: 4; 168.76 sec; lr: 0.005429; Average loss: 53.95, Train accuracy: 0.5572, Val accuracy: 0.5221
False True             False False
Epoch: 5; 202.45 sec; lr: 0.005429; Average loss: 20.60, Train accuracy: 0.6029, Val accuracy: 0.5422
False False             True False
Epoch: 6; 236.24 sec; lr: 0.005429; Average loss: 57.61, Train accuracy: 0.5644, Val accuracy: 0.5120
True False             False True
Epoch: 7; 269.94 sec; lr: 0.002997; Average los

[32m[I 2022-11-07 21:58:45,783][0m Trial 133 finished with value: 0.5257697456492637 and parameters: {'layer_count': 209, 'step_size': 4, 'batch_size': 608, 'learning_rate': 0.009834878741056612, 'gamma': 0.5520464190087694}. Best is trial 127 with value: 0.5431559571619813.[0m


Epoch: 29; 1011.58 sec; lr: 0.000154; Average loss: 6.40, Train accuracy: 0.6114, Val accuracy: 0.5281
True False             True True
Epoch: 0; 33.90 sec; lr: 0.009452; Average loss: 379.54, Train accuracy: 0.5281, Val accuracy: 0.5000
Epoch: 1; 67.56 sec; lr: 0.009452; Average loss: 119.61, Train accuracy: 0.5704, Val accuracy: 0.5361
Epoch: 2; 101.21 sec; lr: 0.009452; Average loss: 38.96, Train accuracy: 0.5852, Val accuracy: 0.5020
Epoch: 3; 134.95 sec; lr: 0.005485; Average loss: 47.73, Train accuracy: 0.5637, Val accuracy: 0.5120
Epoch: 4; 168.62 sec; lr: 0.005485; Average loss: 73.26, Train accuracy: 0.5588, Val accuracy: 0.4980
True False             True False
Epoch: 5; 202.29 sec; lr: 0.005485; Average loss: 134.83, Train accuracy: 0.5401, Val accuracy: 0.5000
False True             False True
Epoch: 6; 235.98 sec; lr: 0.005485; Average loss: 95.36, Train accuracy: 0.5222, Val accuracy: 0.5221
False False             True False
Epoch: 7; 269.65 sec; lr: 0.003183; Average lo

[32m[I 2022-11-07 22:15:38,886][0m Trial 134 finished with value: 0.5204651941097724 and parameters: {'layer_count': 208, 'step_size': 4, 'batch_size': 480, 'learning_rate': 0.009451789666574674, 'gamma': 0.5802696983499939}. Best is trial 127 with value: 0.5431559571619813.[0m


Epoch: 29; 1010.91 sec; lr: 0.000209; Average loss: 6.66, Train accuracy: 0.5995, Val accuracy: 0.5181
False True             False True


[32m[I 2022-11-07 22:15:41,727][0m Trial 135 finished with value: 0.0 and parameters: {'layer_count': 209, 'step_size': 4, 'batch_size': 672, 'learning_rate': 0.00990208335743509, 'gamma': 0.6121902024243027}. Best is trial 127 with value: 0.5431559571619813.[0m


Exception: CUDA out of memory. Tried to allocate 964.00 MiB (GPU 0; 8.00 GiB total capacity; 4.49 GiB already allocated; 0 bytes free; 6.87 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
Epoch: 0; 33.80 sec; lr: 0.008919; Average loss: 321.08, Train accuracy: 0.5672, Val accuracy: 0.5000
Epoch: 1; 67.54 sec; lr: 0.008919; Average loss: 185.33, Train accuracy: 0.5585, Val accuracy: 0.4980
Epoch: 2; 101.21 sec; lr: 0.008919; Average loss: 159.81, Train accuracy: 0.5483, Val accuracy: 0.5040
Epoch: 3; 134.90 sec; lr: 0.005270; Average loss: 78.14, Train accuracy: 0.5786, Val accuracy: 0.5100
Epoch: 4; 168.66 sec; lr: 0.005270; Average loss: 63.77, Train accuracy: 0.5523, Val accuracy: 0.5221
False False             False True
Epoch: 5; 202.36 sec; lr: 0.005270; Average loss: 32.74, Train accuracy: 0.5910, Val accuracy: 0.5100
True False   

[32m[I 2022-11-07 22:32:33,928][0m Trial 136 finished with value: 0.5277443105756359 and parameters: {'layer_count': 209, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.008919415838452821, 'gamma': 0.5908091212665064}. Best is trial 127 with value: 0.5431559571619813.[0m


Epoch: 29; 1010.19 sec; lr: 0.000224; Average loss: 6.81, Train accuracy: 0.6152, Val accuracy: 0.5422
False False             True False
Epoch: 0; 33.65 sec; lr: 0.008755; Average loss: 580.23, Train accuracy: 0.5235, Val accuracy: 0.5000
Epoch: 1; 67.18 sec; lr: 0.008755; Average loss: 205.63, Train accuracy: 0.5833, Val accuracy: 0.5020
Epoch: 2; 100.81 sec; lr: 0.008755; Average loss: 128.78, Train accuracy: 0.5574, Val accuracy: 0.5020
Epoch: 3; 134.37 sec; lr: 0.005190; Average loss: 85.53, Train accuracy: 0.5423, Val accuracy: 0.5301
Epoch: 4; 167.98 sec; lr: 0.005190; Average loss: 25.84, Train accuracy: 0.6095, Val accuracy: 0.5321
False False             True False
Epoch: 5; 201.51 sec; lr: 0.005190; Average loss: 27.00, Train accuracy: 0.5782, Val accuracy: 0.5643
False False             False True
Epoch: 6; 235.12 sec; lr: 0.005190; Average loss: 18.83, Train accuracy: 0.5922, Val accuracy: 0.5301
True False             False False
Epoch: 7; 268.63 sec; lr: 0.003076; Averag

In [None]:
print("Value:", study.best_value)
print("Parameters:", study.best_params)

In [None]:
#Value: 0.5200803212851406
#Parameters: {'layer_count': 209, 'step_size': 4, 'batch_size': 544, 'learning_rate': 0.0036136998392606355, 'gamma': 0.6342624350988135}

In [None]:
assert False

In [None]:
def objective(trial):    
    if trial.should_prune():
        print("pruned")
        raise optuna.exceptions.TrialPruned()
        
    #layer_count = trial.suggest_int("layer_count", 180, 212, 2)    
    step_size = trial.suggest_int("step_size", 4, 10, 2)      
    batch_size = trial.suggest_int("batch_size", 32, 544, 64)    
    learning_rate = trial.suggest_float("learning_rate", 1e-6, 1e-2)
    gamma = trial.suggest_float("gamma", 0.5, 1)
    
    model = VisualBertForPreTraining.from_pretrained('uclanlp/visualbert-nlvr2-coco-pre') # this checkpoint has 1024 dimensional visual embeddings projection
    for i, param in enumerate(model.parameters()):
        param.requires_grad = False
        #if i > layer_count:
        #    break

    model.cls.predictions.decoder = torch.nn.Linear(in_features=768, out_features=2, bias=True)
    model = model.to(device)


    params = []
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            params.append(param)        

    optimizer = torch.optim.Adam(params, lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
    
    try:
        loss_history, train_history, val_history, best_model_name = train_model(
            model, 
            DataLoader(features_train_dataset, batch_size=batch_size),
            DataLoader(features_val_dataset, batch_size=500),
            loss, optimizer, 50, scheduler, device)
    
    except Exception as ex:
        print('Exception:', ex)
        return 0
    
    return val_history[-1]


study = optuna.create_study(direction="maximize", pruner=optuna.pruners.PatientPruner(None, patience=4, min_delta=0.01))
study.optimize(objective, n_trials=100)

In [None]:
#model = VisualBertForPreTraining.from_pretrained('uclanlp/visualbert-nlvr2-coco-pre') # this checkpoint has 1024 dimensional visual embeddings projection

In [None]:
#for param in model.parameters():
#    param.requires_grad = False

In [None]:
#model

In [None]:
#model.cls.predictions.decoder = torch.nn.Linear(in_features=768, out_features=2, bias=True)

In [None]:
#model = model.to(device)

In [None]:
#for name, param in model.named_parameters():
#    if param.requires_grad == True:
#        print(name)

In [None]:
for id, visual_embeds, text, label in DataLoader(features_val_dataset, batch_size=9):

    visual_embeds = visual_embeds.to(device)    
    tokens = tokenizer(list(text), padding='max_length', max_length=77)

    input_ids = torch.tensor(tokens["input_ids"], device=device)
    attention_mask = torch.tensor(tokens["attention_mask"], device=device)
    token_type_ids = torch.tensor(tokens["token_type_ids"], device=device)

    visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.long, device=device)
    visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long, device=device)
        
    
    
    
    outputs = model(input_ids=input_ids, 
                      attention_mask=attention_mask, 
                      token_type_ids=token_type_ids, 
                      visual_embeds=visual_embeds, 
                      visual_attention_mask=visual_attention_mask, 
                      visual_token_type_ids=visual_token_type_ids
                  )
    
    prediction = outputs.prediction_logits.sum(axis=1)
    break
    
prediction.shape

In [None]:
prediction.sum(axis=1).shape

In [None]:
t1 = time.time()
val_accuracy = compute_accuracy(model, DataLoader(features_val_dataset, batch_size=500), device)
print("%f sec; Val accuracy: %f" % (round(time.time() - t1, 2), val_accuracy))

In [None]:
assert False

In [None]:
len(list(model.parameters()))

In [None]:
loss = torch.nn.CrossEntropyLoss()

params = []
for name, param in model.named_parameters():
    if param.requires_grad == True:
        params.append(param)
        
optimizer = torch.optim.Adam(params, lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.8)

loss_history, train_history, val_history, best_model_name = train_model(
    model, 
    DataLoader(features_train_dataset, batch_size=500),
    DataLoader(features_val_dataset, batch_size=500),
    loss, optimizer, 50, scheduler, device)
print('end!')


In [None]:
fig = plt.figure(figsize=(10, 8))    
plt.xlabel("#iteration")
plt.ylabel("loss")
plt.plot(loss_history, label='loss')
plt.plot(train_history, label='train accuracy')
plt.plot(val_history, label='val accuracy')
fig.legend()
plt.show()

In [None]:
print("best model:", best_model_name)

best_model = torch.load(open(best_model_name, 'rb'))
print(best_model)

In [None]:
best_model.eval()
for i_step, (x, y) in enumerate(DataLoader(features_val_dataset, batch_size=5000)):
    prediction = best_model(x)

acc_score = accuracy_score(np.array([x.item() for x in labels_val]), torch.max(prediction, 1)[1])
auc_score = roc_auc_score(np.array([x.item() for x in labels_val]), prediction[:,1].detach().numpy())

fpr, tpr, thresh = roc_curve(labels_val, prediction[:,1].detach().numpy(), pos_label=1)

random_probs = [0 for i in range(len(labels_val))]
p_fpr, p_tpr, _ = roc_curve(labels_val, random_probs, pos_label=1)
auc_score = roc_auc_score(labels_val, prediction[:,1].detach().numpy())

print('Accuracy: ', acc_score, '\n', 'ROC AUC: ', auc_score, sep='')

plt.plot(fpr, tpr, linestyle='--',color='orange')
plt.plot(p_fpr, p_tpr, linestyle='--', color='blue')
plt.title('ROC Curve', fontsize=20)
plt.xlabel('False Positive Rate', fontsize=18)
plt.ylabel('True Positive rate',fontsize=18)

plt.show();

In [None]:
assert False

In [None]:
outputs_val = {}

for i, k in enumerate(tqdm(visual_embeddings_val)):

    outputs = model(input_ids=input_ids, 
                    attention_mask=attention_mask, 
                    token_type_ids=token_type_ids, 
                    visual_embeds=visual_embeds, 
                    visual_attention_mask=visual_attention_mask, 
                    visual_token_type_ids=visual_token_type_ids)
    
    o = torch.tensor(outputs.prediction_logits.cpu().flatten(), dtype=torch.float16)
    outputs_val[k] = o
    
    if (i + 1) % 50 == 0:        
        with open(f'd:\\val\\outputs_val_{i}.pkl', 'wb') as f:
            pickle.dump(outputs_val, f)
        outputs_val = {}

if len(outputs_val) > 0:
    with open(f'd:\\val\\outputs_val_{i}.pkl', 'wb') as f:
        pickle.dump(outputs_val, f)

In [None]:
#assert False

In [None]:
with open('d:\\visual_embeddings_train.pkl', 'rb') as f:
    visual_embeddings_train = pickle.load(f)
    
train_dict = {}
for x in train_data.values:
    if x[1] in visual_embeddings_train:
        train_dict[x[1]] = {'label':x[2], 'text':x[3]}
    
with open('d:\\train\\labels_train.pkl', 'wb') as f:
    pickle.dump(train_dict, f)

In [None]:
outputs_train = {}

for i, k in enumerate(tqdm(visual_embeddings_train)):
    visual_embeds = visual_embeddings_train[k][0].to(device)
    text = train_dict[k]['text']
    
    tokens = tokenizer([text], padding='max_length', max_length=77)
    
    input_ids = torch.tensor(tokens["input_ids"], device=device)
    attention_mask = torch.tensor(tokens["attention_mask"], device=device)
    token_type_ids = torch.tensor(tokens["token_type_ids"], device=device)
    
    visual_embeds = torch.stack([visual_embeds])
    visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.long, device=device)
    visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long, device=device)
    
    outputs = model(input_ids=input_ids, 
                    attention_mask=attention_mask, 
                    token_type_ids=token_type_ids, 
                    visual_embeds=visual_embeds, 
                    visual_attention_mask=visual_attention_mask, 
                    visual_token_type_ids=visual_token_type_ids)
    
    o = torch.tensor(outputs.prediction_logits.cpu().flatten(), dtype=torch.float16)
    outputs_train[k] = o
    
    if (i + 1) % 50 == 0:        
        with open(f'd:\\train\\outputs_train_{i}.pkl', 'wb') as f:
            pickle.dump(outputs_train, f)
        outputs_train = {}
        
if len(outputs_train) > 0:
    with open(f'd:\\train\\outputs_train_{i}.pkl', 'wb') as f:
        pickle.dump(outputs_train, f)