In [1]:
from TinyImageNetLoader import TinyImageNetDataset
import torchvision.transforms as transforms
import torch

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    normalize
])

valset = TinyImageNetDataset("/datasets/tiny-imagenet-200", mode="val", transform=val_transform)
#print(next(enumerate(validation_loader)))

train_transform =  transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(0.5),
    normalize
])

trainset = TinyImageNetDataset("/datasets/tiny-imagenet-200", transform=train_transform)

Preloading val data...: 100%|██████████| 10000/10000 [01:10<00:00, 142.22it/s]
Preloading train data...: 100%|██████████| 100000/100000 [11:33<00:00, 144.25it/s]


In [2]:
validation_loader = torch.utils.data.DataLoader(
        valset, batch_size=256, shuffle=False, num_workers=4)
training_loader = torch.utils.data.DataLoader(
        trainset, batch_size=256, shuffle=True, num_workers=4)

In [3]:
import torch.nn as nn
eps=1e-10
def loss_fn(out, labels, predicate_matrix):
    out = out.view(-1, 1, NUM_FEATURES) # out is a batch of 1D binary vectors
    ANDed = out * predicate_matrix # AND operation
    diff = ANDed - out # Difference of ANDed and out => if equal, then out is a subset of its class' predicates

    entr_loss = torch.nn.CrossEntropyLoss()
    loss_cl = entr_loss(diff.sum(dim=2), labels) # Is "out" a subset of its class' predicates?

    batch_size = out.shape[0]

    out = out.view(-1, NUM_FEATURES)
    diff_square = (out - predicate_matrix[labels]).pow(2)
    
    false_positives = (out - predicate_matrix[labels] + diff_square).sum() / batch_size
    missing_attr = (predicate_matrix[labels] - out + diff_square).sum() / batch_size
    
    loss_ft = (1 + false_positives + missing_attr)
    loss_ft *= loss_cl.item()/(loss_ft.item() + eps)
    
    return loss_cl + loss_ft * FT_WEIGHT

In [4]:
def train_one_epoch(scheduler):
    running_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(training_loader):
        # Every data instance is an input + label pair
        inputs, labels = data["images"], data["labels"]
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs, commit_loss, predicate_matrix = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels, predicate_matrix) + commit_loss
        loss.backward()

        # Adjust learning weights
        optimizer.step()
        
        if scheduler is not None:
            scheduler.step()

        # Gather data and report
        running_loss += loss.item()

    return running_loss / (i+1)

In [9]:
from torchmetrics import Accuracy
import sys, os
sys.path.insert(0, "/".join(os.path.abspath('').split("/")[:-1]) + "/models")
print("/".join(os.path.abspath('').split("/")[:-1]) + "/models")
from ResnetAutoPredicates import ResExtr

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

NUM_FEATURES = 64
NUM_CLASSES = 200
EPOCHS = 70
accuracy = Accuracy(task="multiclass", num_classes=NUM_CLASSES, top_k=1).to(device)

FT_WEIGHT = 0.7

model = ResExtr(NUM_FEATURES, NUM_CLASSES, pretrained=True).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=2e-4, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, 5e-4, epochs=EPOCHS, steps_per_epoch=len(training_loader))

best_stats = {
    "epoch": 0,
    "train_loss": 0,
    "val_loss": 0,
    "val_acc": 0,
    "fp": 0,
    "ma": 0,
    "oa": 0
}

from tqdm import tqdm

pbar = tqdm(range(EPOCHS))

for epoch in pbar:
    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss = train_one_epoch(scheduler)

    running_vloss = 0.0
    # Set the model to evaluation mode, disabling dropout and using population
    # statistics for batch normalization.
    model.eval()
    running_acc = 0.0
    running_false_positives = 0.0
    running_missing_attr = 0.0
    running_out_attributes= 0.0

    # Disable gradient computation and reduce memory consumption.
    with torch.no_grad():
        for i, vdata in enumerate(validation_loader):
            vinputs, vlabels = vdata["images"], vdata["labels"]
            vinputs = vinputs.to(device)
            vlabels = vlabels.to(device)
            voutputs, vcommit_loss, predicate_matrix = model(vinputs)
            vloss = loss_fn(voutputs, vlabels, predicate_matrix) + vcommit_loss
            running_vloss += vloss.item()
            voutputs = voutputs.view(-1, 1, NUM_FEATURES)
            ANDed = voutputs * predicate_matrix
            diff = ANDed - voutputs
            running_acc += accuracy(diff.sum(dim=2), vlabels)
            voutputs = voutputs.view(-1, NUM_FEATURES)
            running_false_positives += ((predicate_matrix[vlabels] - voutputs) == -1).sum() / voutputs.shape[0]
            running_missing_attr += ((voutputs - predicate_matrix[vlabels]) == -1).sum() / voutputs.shape[0]
            running_out_attributes += voutputs.sum() / voutputs.shape[0]

    avg_vloss = running_vloss / (i + 1)
    avg_acc = running_acc / (i + 1)
    avg_fp = running_false_positives / (i + 1)
    avg_ma = running_missing_attr / (i + 1)
    avg_oa = running_out_attributes / (i + 1)
    pbar.set_postfix_str(f"LOSS: {avg_vloss}, ACC: {avg_acc}, FP: {avg_fp}, MA: {avg_ma}, OA: {avg_oa}")
    #print(model.bin_quantize._codebook.embed)
    
    with open("TINRes18AutoPredData.csv", "a") as f:
        f.write(f"{epoch}, {avg_loss}, {avg_vloss}, {avg_acc}, {avg_fp}, {avg_ma}, {avg_oa}\n")

    if best_stats["val_acc"] < avg_acc:
        best_stats["epoch"] = epoch
        best_stats["train_loss"] = avg_loss
        best_stats["val_loss"] = avg_vloss
        best_stats["val_acc"] = avg_acc.item()
        best_stats["fp"] = avg_fp.item()
        best_stats["ma"] = avg_ma.item()
        best_stats["oa"] = avg_oa.item()


print(best_stats)

/notebooks/Concept_ZSL/src/models
Device: cuda


100%|██████████| 70/70 [18:18<00:00, 15.69s/it, LOSS: 5.009602475166321, ACC: 0.4854492247104645, FP: 4.552734375, MA: 3.5965821743011475, OA: 11.45127010345459]        

{'epoch': 36, 'train_loss': 2.1793006579284473, 'val_loss': 4.711245334148407, 'val_acc': 0.4901367127895355, 'fp': 4.1708984375, 'ma': 3.57763671875, 'oa': 10.9697265625}





# Resnet Baseline

In [7]:
def train_one_epoch_baseline():
    running_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(training_loader):
        # Every data instance is an input + label pair
        inputs, labels = data["images"], data["labels"]
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn_baseline(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()

    return running_loss / (i+1)

In [13]:
from torchmetrics import Accuracy
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

NUM_CLASSES = 200
EPOCHS = 15
accuracy = Accuracy(task="multiclass", num_classes=NUM_CLASSES, top_k=1).to(device)

POS_FT_WEIGHT = 0
FT_WEIGHT = 0

from torchvision.models import resnet18, ResNet18_Weights
model = resnet18(weights=ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(512, NUM_CLASSES)

model = model.to(device)

loss_fn_baseline = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=2e-4, weight_decay=1e-5)

best_stats = {
    "epoch": 0,
    "train_loss": 0,
    "val_loss": 0,
    "val_acc": 0,
}

from tqdm import tqdm
for epoch in tqdm(range(EPOCHS)):
    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss = train_one_epoch_baseline()

    running_vloss = 0.0
    # Set the model to evaluation mode, disabling dropout and using population
    # statistics for batch normalization.
    model.eval()
    running_acc = 0.0
    running_false_positives = 0.0

    # Disable gradient computation and reduce memory consumption.
    with torch.no_grad():
        for i, vdata in enumerate(validation_loader):
            vinputs, vlabels = vdata["images"], vdata["labels"]
            vinputs = vinputs.to(device)
            vlabels = vlabels.to(device)
            voutputs = model(vinputs)
            vloss = loss_fn_baseline(voutputs, vlabels)
            running_vloss += vloss.item()
            running_acc += accuracy(voutputs, vlabels)

    avg_vloss = running_vloss / (i + 1)
    avg_acc = running_acc / (i + 1)
    print(f"LOSS: {avg_vloss}, ACC: {avg_acc}")

    if best_stats["val_acc"] < avg_acc:
        best_stats["epoch"] = epoch
        best_stats["train_loss"] = avg_loss
        best_stats["val_loss"] = avg_vloss
        best_stats["val_acc"] = avg_acc.item()

print(best_stats)

Device: cuda


  7%|▋         | 1/15 [00:15<03:32, 15.20s/it]

LOSS: 2.7538333177566527, ACC: 0.3666932284832001


 13%|█▎        | 2/15 [00:30<03:16, 15.14s/it]

LOSS: 2.268373465538025, ACC: 0.45415934920310974


 20%|██        | 3/15 [00:45<03:02, 15.21s/it]

LOSS: 2.1400376319885255, ACC: 0.4812440574169159


 27%|██▋       | 4/15 [01:01<02:48, 15.34s/it]

LOSS: 1.9924053430557251, ACC: 0.5112384557723999


 33%|███▎      | 5/15 [01:16<02:33, 15.37s/it]

LOSS: 1.9608011841773987, ACC: 0.5201749801635742


 40%|████      | 6/15 [01:31<02:18, 15.38s/it]

LOSS: 1.9408017992973328, ACC: 0.5298649072647095


 47%|████▋     | 7/15 [01:47<02:04, 15.54s/it]

LOSS: 1.9406044483184814, ACC: 0.5277164578437805


 53%|█████▎    | 8/15 [02:02<01:47, 15.40s/it]

LOSS: 1.9414733171463012, ACC: 0.5363759398460388


 60%|██████    | 9/15 [02:18<01:32, 15.38s/it]

LOSS: 1.9837289929389954, ACC: 0.5289102792739868


 67%|██████▋   | 10/15 [02:33<01:16, 15.33s/it]

LOSS: 2.0138878226280212, ACC: 0.5291215181350708


 73%|███████▎  | 11/15 [02:48<01:01, 15.31s/it]

LOSS: 2.0528515219688415, ACC: 0.5269291996955872


 80%|████████  | 12/15 [03:03<00:45, 15.20s/it]

LOSS: 2.074764096736908, ACC: 0.5255321860313416


 87%|████████▋ | 13/15 [03:18<00:30, 15.17s/it]

LOSS: 2.1348794221878054, ACC: 0.5197624564170837


 93%|█████████▎| 14/15 [03:33<00:15, 15.18s/it]

LOSS: 2.1823304891586304, ACC: 0.523909866809845


100%|██████████| 15/15 [03:48<00:00, 15.26s/it]

LOSS: 2.1891473531723022, ACC: 0.528218686580658
{'epoch': 7, 'train_loss': 1.1221286131411183, 'val_loss': 1.9414733171463012, 'val_acc': 0.5363759398460388}





# Optuna

In [12]:
import torch.nn as nn

eps = 1e-10
def loss_fn_optuna(out, labels, predicate_matrix, NUM_FEATURES, FT_WEIGHT):
    out = out.view(-1, 1, NUM_FEATURES) # out is a batch of 1D binary vectors
    ANDed = out * predicate_matrix # AND operation
    diff = ANDed - out # Difference of ANDed and out => if equal, then out is a subset of its class' predicates

    entr_loss = nn.CrossEntropyLoss()
    loss_cl = entr_loss(diff.sum(dim=2), labels) # Is "out" a subset of its class' predicates?

    batch_size = out.shape[0]

    out = out.view(-1, NUM_FEATURES)
    diff_square = (out - predicate_matrix[labels]).pow(2)
    
    false_positives = (out - predicate_matrix[labels] + diff_square).sum() / batch_size
    missing_attr = (predicate_matrix[labels] - out + diff_square).sum() / batch_size
    
    loss_ft = (1 + false_positives + missing_attr)
    loss_ft *= loss_cl.item()/(loss_ft.item() + eps)
    
    return loss_cl + loss_ft * FT_WEIGHT

from torchmetrics import Accuracy
def train_one_epoch_optuna(model, optimizer, scheduler, NUM_FEATURES, FT_WEIGHT):
    running_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(training_loader):
        # Every data instance is an input + label pair
        inputs, labels = data["images"], data["labels"]
        inputs = inputs.to(device)
        labels = labels.to(device)

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs, commit_loss, predicate_matrix = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn_optuna(outputs, labels, predicate_matrix, NUM_FEATURES, FT_WEIGHT) + commit_loss
        loss.backward()

        # Adjust learning weights
        optimizer.step()
        
        scheduler.step()

        # Gather data and report
        running_loss += loss.item()

    return running_loss / (i+1)

from tqdm import tqdm
from torch import optim

import sys, os
sys.path.insert(0, "/".join(os.path.abspath('').split("/")[:-1]) + "/models")
print("/".join(os.path.abspath('').split("/")[:-1]) + "/models")
from ResnetAutoPredicates import ResExtr

def objective(trial):
    global trial_num
    trial_num += 1
    print(f"Starting trial {trial_num}")
    NUM_FEATURES = trial.suggest_int("num_features", 4, 16)
    FT_WEIGHT = trial.suggest_float("ft_weight", 0, 1)
    # Generate the model.
    model = ResExtr(NUM_FEATURES*8, NUM_CLASSES, pretrained=True).to(device)

    EPOCHS = 50

    # Generate the optimizers.
    lr = trial.suggest_float("lr", 1e-5, 1e-3, log=True)
    max_lr = trial.suggest_float("max_lr", 1e-4, 4e-3, log=True)
    
    if lr > max_lr:
        raise optuna.TrialPruned()
    
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=EPOCHS, steps_per_epoch=len(training_loader))

    best_acc = 0.0

    pbar = tqdm(range(EPOCHS))
    for epoch in pbar:
        # Make sure gradient tracking is on, and do a pass over the data
        model.train(True)
        _ = train_one_epoch_optuna(model, optimizer, scheduler, NUM_FEATURES*8, FT_WEIGHT)

        model.eval()
        running_acc = 0.0

        # Disable gradient computation and reduce memory consumption.
        with torch.no_grad():
            for i, vdata in enumerate(validation_loader):
                vinputs, vlabels = vdata["images"], vdata["labels"]
                vinputs = vinputs.to(device)
                vlabels = vlabels.to(device)
                voutputs, _, predicate_matrix = model(vinputs)
                voutputs = voutputs.view(-1, 1, NUM_FEATURES*8)
                ANDed = voutputs * predicate_matrix
                diff = ANDed - voutputs
                running_acc += accuracy(diff.sum(dim=2), vlabels)

        avg_acc = running_acc / (i + 1)
        pbar.set_postfix_str(f"ACC: {avg_acc}")

        if avg_acc > best_acc:
            best_acc = avg_acc

        if epoch == 30 and best_acc < 0.3:
            raise optuna.TrialPruned()
        elif epoch == 15 and best_acc < 0.1:
            raise optuna.TrialPruned()
    
    return best_acc

/notebooks/Concept_ZSL/src/models


In [13]:
import optuna

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

NUM_CLASSES = 200
accuracy = Accuracy(task="multiclass", num_classes=NUM_CLASSES, top_k=1).to(device)

trial_num = -1

study = optuna.create_study(direction="maximize", study_name='TIN-ResNet18-AutoPred50', load_if_exists=True, storage='sqlite:///Optuna.db')
study.optimize(objective, n_trials=100)

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    if key == "num_features":
        print("    {}: {}".format(key, value*8))
    else:
        print("    {}: {}".format(key, value))

[I 2023-10-13 13:34:48,235] Using an existing study with name 'TIN-ResNet18-AutoPred50' instead of creating a new one.


Device: cuda
Starting trial 0


100%|██████████| 50/50 [12:49<00:00, 15.40s/it, ACC: 0.508593738079071]  
[I 2023-10-13 13:47:38,608] Trial 1 finished with value: 0.510546863079071 and parameters: {'num_features': 11, 'ft_weight': 0.21935673023924307, 'lr': 1.7167562294364418e-05, 'max_lr': 0.0011501295319336989}. Best is trial 1 with value: 0.510546863079071.


Starting trial 1


[I 2023-10-13 13:47:39,081] Trial 2 pruned. 


Starting trial 2


100%|██████████| 50/50 [12:44<00:00, 15.29s/it, ACC: 0.49853515625]      
[I 2023-10-13 14:00:24,311] Trial 3 finished with value: 0.49951171875 and parameters: {'num_features': 14, 'ft_weight': 0.7939863651996772, 'lr': 7.690743109731049e-05, 'max_lr': 0.00013688128691750003}. Best is trial 1 with value: 0.510546863079071.


Starting trial 3


100%|██████████| 50/50 [12:42<00:00, 15.26s/it, ACC: 0.4931640625]       
[I 2023-10-13 14:13:07,857] Trial 4 finished with value: 0.4951171875 and parameters: {'num_features': 14, 'ft_weight': 0.8611505723866981, 'lr': 3.056886218349674e-05, 'max_lr': 0.00046599358804611283}. Best is trial 1 with value: 0.510546863079071.


Starting trial 4


100%|██████████| 50/50 [12:42<00:00, 15.25s/it, ACC: 0.48320314288139343]
[I 2023-10-13 14:25:50,503] Trial 5 finished with value: 0.48613283038139343 and parameters: {'num_features': 6, 'ft_weight': 0.2976142931260036, 'lr': 0.0001119688645396934, 'max_lr': 0.0027770232749718704}. Best is trial 1 with value: 0.510546863079071.


Starting trial 5


100%|██████████| 50/50 [12:38<00:00, 15.17s/it, ACC: 0.515917956829071]  
[I 2023-10-13 14:38:29,860] Trial 6 finished with value: 0.51806640625 and parameters: {'num_features': 7, 'ft_weight': 0.18995618534649383, 'lr': 6.4876288607133e-05, 'max_lr': 0.00037619695818120666}. Best is trial 6 with value: 0.51806640625.


Starting trial 6


100%|██████████| 50/50 [12:50<00:00, 15.40s/it, ACC: 0.49531251192092896]
[I 2023-10-13 14:51:20,719] Trial 7 finished with value: 0.49531251192092896 and parameters: {'num_features': 14, 'ft_weight': 0.5772691618290632, 'lr': 1.1310082967073951e-05, 'max_lr': 0.001050714067671693}. Best is trial 6 with value: 0.51806640625.


Starting trial 7


100%|██████████| 50/50 [12:46<00:00, 15.34s/it, ACC: 0.50634765625]      
[I 2023-10-13 15:04:08,047] Trial 8 finished with value: 0.5088867545127869 and parameters: {'num_features': 13, 'ft_weight': 0.3674204716276448, 'lr': 4.136947072409993e-05, 'max_lr': 0.0007749745820584464}. Best is trial 6 with value: 0.51806640625.


Starting trial 8


100%|██████████| 50/50 [12:46<00:00, 15.33s/it, ACC: 0.4986328184604645] 
[I 2023-10-13 15:16:55,224] Trial 9 finished with value: 0.49931642413139343 and parameters: {'num_features': 12, 'ft_weight': 0.7524542413775797, 'lr': 2.2172571202409232e-05, 'max_lr': 0.000570021835799505}. Best is trial 6 with value: 0.51806640625.


Starting trial 9


[I 2023-10-13 15:16:55,723] Trial 10 pruned. 


Starting trial 10


100%|██████████| 50/50 [12:47<00:00, 15.35s/it, ACC: 0.512402355670929]  
[I 2023-10-13 15:29:43,894] Trial 11 finished with value: 0.5140625238418579 and parameters: {'num_features': 8, 'ft_weight': 0.0044688007772815674, 'lr': 0.0001473944606781968, 'max_lr': 0.0003139445734931126}. Best is trial 6 with value: 0.51806640625.


Starting trial 11


100%|██████████| 50/50 [12:43<00:00, 15.26s/it, ACC: 0.5106445550918579] 
[I 2023-10-13 15:42:27,839] Trial 12 finished with value: 0.5106445550918579 and parameters: {'num_features': 8, 'ft_weight': 0.02535873641539843, 'lr': 0.00017378868618930874, 'max_lr': 0.0003016290570968002}. Best is trial 6 with value: 0.51806640625.


Starting trial 12


100%|██████████| 50/50 [12:37<00:00, 15.14s/it, ACC: 0.51611328125]      
[I 2023-10-13 15:55:05,647] Trial 13 finished with value: 0.517285168170929 and parameters: {'num_features': 9, 'ft_weight': 0.0012140908688575364, 'lr': 0.00024550936478905994, 'max_lr': 0.0003108933933457271}. Best is trial 6 with value: 0.51806640625.


Starting trial 13


[I 2023-10-13 15:55:06,089] Trial 14 pruned. 


Starting trial 14


100%|██████████| 50/50 [12:45<00:00, 15.32s/it, ACC: 0.513867199420929]  
[I 2023-10-13 16:07:52,537] Trial 15 finished with value: 0.517578125 and parameters: {'num_features': 9, 'ft_weight': 0.16430276672639066, 'lr': 5.428315605446366e-05, 'max_lr': 0.00040553819511015583}. Best is trial 6 with value: 0.51806640625.


Starting trial 15


100%|██████████| 50/50 [12:50<00:00, 15.41s/it, ACC: 0.5140625238418579] 
[I 2023-10-13 16:20:43,455] Trial 16 finished with value: 0.5140625238418579 and parameters: {'num_features': 16, 'ft_weight': 0.41937230638263145, 'lr': 6.48854361675037e-05, 'max_lr': 0.00047549171165264857}. Best is trial 6 with value: 0.51806640625.


Starting trial 16


100%|██████████| 50/50 [12:39<00:00, 15.19s/it, ACC: 0.48359376192092896]
[I 2023-10-13 16:33:23,420] Trial 17 finished with value: 0.49091798067092896 and parameters: {'num_features': 6, 'ft_weight': 0.19221798736081427, 'lr': 5.064141205111694e-05, 'max_lr': 0.00010024112056060755}. Best is trial 6 with value: 0.51806640625.


Starting trial 17


100%|██████████| 50/50 [12:37<00:00, 15.15s/it, ACC: 0.5069336295127869] 
[I 2023-10-13 16:46:01,644] Trial 18 finished with value: 0.509765625 and parameters: {'num_features': 10, 'ft_weight': 0.2814229714259377, 'lr': 7.156104300564866e-05, 'max_lr': 0.00019658724586958486}. Best is trial 6 with value: 0.51806640625.


Starting trial 18


100%|██████████| 50/50 [12:35<00:00, 15.10s/it, ACC: 0.48515626788139343]
[I 2023-10-13 16:58:37,328] Trial 19 finished with value: 0.4864257872104645 and parameters: {'num_features': 4, 'ft_weight': 0.4929967805053706, 'lr': 3.395796582636252e-05, 'max_lr': 0.0007525776321807799}. Best is trial 6 with value: 0.51806640625.


Starting trial 19


100%|██████████| 50/50 [13:09<00:00, 15.78s/it, ACC: 0.5171875357627869] 
[I 2023-10-13 17:11:47,060] Trial 20 finished with value: 0.5185546875 and parameters: {'num_features': 8, 'ft_weight': 0.1411101232079125, 'lr': 0.00010794944363649746, 'max_lr': 0.0004323540810907836}. Best is trial 20 with value: 0.5185546875.


Starting trial 20


100%|██████████| 50/50 [12:35<00:00, 15.12s/it, ACC: 0.5166992545127869] 
[I 2023-10-13 17:24:23,444] Trial 21 finished with value: 0.518261730670929 and parameters: {'num_features': 7, 'ft_weight': 0.10396439217336619, 'lr': 0.00010528689766442166, 'max_lr': 0.0006624848455169455}. Best is trial 20 with value: 0.5185546875.


Starting trial 21


  0%|          | 0/50 [00:10<?, ?it/s]
[W 2023-10-13 17:24:34,350] Trial 22 failed with parameters: {'num_features': 7, 'ft_weight': 0.1087614224956118, 'lr': 0.00010038223252236046, 'max_lr': 0.0006095011915820249} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/dist-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_87/1434010226.py", line 93, in objective
    _ = train_one_epoch_optuna(model, optimizer, scheduler, NUM_FEATURES*8, FT_WEIGHT)
  File "/tmp/ipykernel_87/1434010226.py", line 42, in train_one_epoch_optuna
    outputs, commit_loss, predicate_matrix = model(inputs)
  File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl
    return forward_cal

KeyboardInterrupt: 