# DESCRIPTION
Experiment with MIT's AST (Audio Spectrogram Transformer) for UAV Classification. 

# NOTES
- add inference metric to sweep loop
- change schedular
- more idk

In [2]:
from AST_helper.util import AudioDataset, train_test_split_custom, save_model
from AST_helper.engine import sweep_train, inference_loop
from AST_helper.model import auto_extractor, custom_AST
from AST_helper.util import save_model # noqa: F401

import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
from torchinfo import summary

import wandb

device = "cuda" if torch.cuda.is_available() else "cpu"
display(device)

'cuda'

In [3]:
data_path = "C:/Users/Sidewinders/Research_notebooks/Drone_classification/Research/UAV_Dataset_9"
model_name = "MIT/ast-finetuned-audioset-10-10-0.4593"
BATCH_SIZE = 16
SEED = 42
NUM_CUDA_WORKERS = 0
NUM_CLASSES =  9 
EPOCHS = 7
PINNED_MEMORY = True
SHUFFLED = True
ACCUMULATION_STEPS = 3 # multiplies by batch size for large batch size effect.
SAVE_MODEL = False
PROJECT_NAME = "AST_Sweeps"

torch.cuda.empty_cache()
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

In [4]:
sweep_config = {
    "name": "Schedular",
    "method": "random",
    "metric": {"goal": "maximize", "name": "test_acc"},
    "parameters": {
        "learning_rate": {"distribution":"uniform","min": 0.0001, "max": 0.001},
        "batch_size": {"values": [4,8,16,32]},
        "epochs" : {"values" : [7]},
        "optimizer" : {"values" : ["adamW"]},
        "scheduler" : {"values" : ["PolynomialLR"]}

        }    
}
sweep_id = wandb.sweep(sweep_config, project=PROJECT_NAME)
sweep_count = 100


wandb: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: ange3go8
Sweep URL: https://wandb.ai/andberg9-self/AST_Sweeps/sweeps/ange3go8


In [5]:
def make(config):
    # Make the data
    feature_extractor = auto_extractor(model_name)

    dataset = AudioDataset(data_path, feature_extractor)
    train_subset, test_subset = train_test_split_custom(dataset, test_size=0.2)  # type: ignore


    train_loader = DataLoader(dataset=train_subset, 
                                         batch_size=config.batch_size,
                                         num_workers=NUM_CUDA_WORKERS,
                                         pin_memory=PINNED_MEMORY,
                                         shuffle=SHUFFLED)
    
    test_loader = DataLoader(dataset=test_subset,
                                        batch_size=config.batch_size, 
                                        num_workers=NUM_CUDA_WORKERS,
                                        pin_memory=PINNED_MEMORY,
                                        shuffle=SHUFFLED)
    
    # if inference_subset: # may not be defined
    #     inference_dataloader_custom = DataLoader(dataset=inference_subset,
    #                                     batch_size=config.batch_size, 
    #                                     num_workers=NUM_CUDA_WORKERS,
    #                                     pin_memory=PINNED_MEMORY,
    #                                     shuffle=SHUFFLED) 

    # Make the model
    model = custom_AST(model_name, NUM_CLASSES, device)

    # Make the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(),
                              lr=config.learning_rate)
    
    
    scheduler = optim.lr_scheduler.PolynomialLR(optimizer) 

    return model, train_loader, test_loader, criterion, optimizer, scheduler

In [6]:
def model_pipeline(config=None):

    # tell wandb to get started
    with wandb.init(config):
      # access all HPs through wandb.config, so logging matches execution!
      config = wandb.config
      # make the model, data, and optimization problem
      model, train_loader, test_loader, criterion, optimizer, scheduler = make(config)
      print(model)

      results = sweep_train(model,
                      train_dataloader=train_loader,
                      test_dataloader=test_loader,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      loss_fn=criterion,
                      epochs=config.epochs, # type: ignore
                      device=device,
                      num_classes=NUM_CLASSES
                      # patience=TRAIN_PATIENCE)
                      )
      
      inference_loop(model=model,
               device=device,
               loss_fn=criterion,
               inference_loader= train_loader)

    return model, results

In [7]:
# model,result = model_pipeline(config)
wandb.agent(sweep_id, model_pipeline, count=sweep_count)

wandb: Agent Starting Run: x8ajx6v4 with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0009319810259145844
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR
wandb: Currently logged in as: andberg9 (andberg9-self). Use `wandb login --relogin` to force relogin


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

  context_layer = torch.nn.functional.scaled_dot_product_attention(


Epoch 1/7 | Train Loss: 1.9678 | Train Acc: 0.3302 | Train F1: 0.3231 | Test Loss: 1.5025 | Test Acc: 0.6573 | Test F1: 0.6406
Epoch 2/7 | Train Loss: 1.2797 | Train Acc: 0.7541 | Train F1: 0.7407 | Test Loss: 1.0364 | Test Acc: 0.8656 | Test F1: 0.8624
Epoch 3/7 | Train Loss: 0.9422 | Train Acc: 0.8356 | Train F1: 0.8370 | Test Loss: 0.8319 | Test Acc: 0.8823 | Test F1: 0.8801
Epoch 4/7 | Train Loss: 0.7778 | Train Acc: 0.8546 | Train F1: 0.8556 | Test Loss: 0.7199 | Test Acc: 0.9052 | Test F1: 0.9056
Epoch 5/7 | Train Loss: 0.6997 | Train Acc: 0.8750 | Train F1: 0.8719 | Test Loss: 0.6742 | Test Acc: 0.8948 | Test F1: 0.8949
Epoch 6/7 | Train Loss: 0.6797 | Train Acc: 0.8736 | Train F1: 0.8760 | Test Loss: 0.6760 | Test Acc: 0.9010 | Test F1: 0.8949
Epoch 7/7 | Train Loss: 0.6735 | Train Acc: 0.8791 | Train F1: 0.8760 | Test Loss: 0.6724 | Test Acc: 0.8979 | Test F1: 0.8949
Train time on cuda: 341.9821492000003


  with torch.cuda.amp.autocast():


Inference Loss: 0.6798, Accuracy: 87.64%


VBox(children=(Label(value='0.009 MB of 0.009 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▇▇████
test_f1,▁▇▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▇▇█▇▇▇
test_recall,▁▇▇████
train_acc,▁▆▇████
train_f1,▁▆█████

0,1
epoch,7
inference_accuracy,0.87639
inference_loss,0.67983
test_acc,0.89792
test_f1,0.89487
test_loss,0.67239
test_precision,0.90103
test_recall,0.89444
train_acc,0.87908
train_f1,0.87599


wandb: Agent Starting Run: 3cu641k7 with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00039584770002913807
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.0258 | Train Acc: 0.2764 | Train F1: 0.2572 | Test Loss: 1.7099 | Test Acc: 0.4635 | Test F1: 0.4353
Epoch 2/7 | Train Loss: 1.4173 | Train Acc: 0.6722 | Train F1: 0.6587 | Test Loss: 1.2443 | Test Acc: 0.8073 | Test F1: 0.7835
Epoch 3/7 | Train Loss: 1.0886 | Train Acc: 0.8208 | Train F1: 0.8169 | Test Loss: 1.0320 | Test Acc: 0.8542 | Test F1: 0.8404
Epoch 4/7 | Train Loss: 0.9254 | Train Acc: 0.8625 | Train F1: 0.8612 | Test Loss: 0.9177 | Test Acc: 0.8698 | Test F1: 0.8586
Epoch 5/7 | Train Loss: 0.8471 | Train Acc: 0.8764 | Train F1: 0.8754 | Test Loss: 0.8619 | Test Acc: 0.8750 | Test F1: 0.8641
Epoch 6/7 | Train Loss: 0.8230 | Train Acc: 0.8806 | Train F1: 0.8796 | Test Loss: 0.8730 | Test Acc: 0.8750 | Test F1: 0.8641
Epoch 7/7 | Train Loss: 0.8230 | Train Acc: 0.8806 | Train F1: 0.8796 | Test Loss: 0.8630 | Test Acc: 0.8750 | Test F1: 0.8641
Train time on cuda: 333.5204988999999
Inference Loss: 0.8230, Accuracy: 87.92%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▇█████
test_f1,▁▇█████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆█████
test_recall,▁▇█████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.87917
inference_loss,0.82297
test_acc,0.875
test_f1,0.86415
test_loss,0.86305
test_precision,0.87679
test_recall,0.86667
train_acc,0.88056
train_f1,0.87961


wandb: Agent Starting Run: 2msge95t with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0007529002329143164
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.8008 | Train Acc: 0.4497 | Train F1: 0.4118 | Test Loss: 1.5052 | Test Acc: 0.6115 | Test F1: 0.5540
Epoch 2/7 | Train Loss: 1.2801 | Train Acc: 0.7201 | Train F1: 0.7003 | Test Loss: 1.1303 | Test Acc: 0.7500 | Test F1: 0.7239
Epoch 3/7 | Train Loss: 0.9906 | Train Acc: 0.8179 | Train F1: 0.8129 | Test Loss: 0.9448 | Test Acc: 0.8125 | Test F1: 0.8082
Epoch 4/7 | Train Loss: 0.8514 | Train Acc: 0.8492 | Train F1: 0.8435 | Test Loss: 0.8470 | Test Acc: 0.8302 | Test F1: 0.8363
Epoch 5/7 | Train Loss: 0.7826 | Train Acc: 0.8668 | Train F1: 0.8645 | Test Loss: 0.8014 | Test Acc: 0.8500 | Test F1: 0.8479
Epoch 6/7 | Train Loss: 0.7598 | Train Acc: 0.8682 | Train F1: 0.8659 | Test Loss: 0.8030 | Test Acc: 0.8531 | Test F1: 0.8479
Epoch 7/7 | Train Loss: 0.7607 | Train Acc: 0.8668 | Train F1: 0.8659 | Test Loss: 0.7994 | Test Acc: 0.8563 | Test F1: 0.8479
Train time on cuda: 334.6163984
Inference Loss: 0.7569, Accuracy: 86.67%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇▇███
test_f1,▁▅▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▅▇████
test_recall,▁▅▇████
train_acc,▁▆▇████
train_f1,▁▅▇████

0,1
epoch,7
inference_accuracy,0.86667
inference_loss,0.75685
test_acc,0.85625
test_f1,0.84792
test_loss,0.7994
test_precision,0.86136
test_recall,0.85
train_acc,0.86685
train_f1,0.86595


wandb: Agent Starting Run: 24n7egxo with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0007039213252775477
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.8895 | Train Acc: 0.3736 | Train F1: 0.3714 | Test Loss: 1.5035 | Test Acc: 0.6802 | Test F1: 0.6723
Epoch 2/7 | Train Loss: 1.3608 | Train Acc: 0.7364 | Train F1: 0.7321 | Test Loss: 1.1796 | Test Acc: 0.8042 | Test F1: 0.7748
Epoch 3/7 | Train Loss: 1.0601 | Train Acc: 0.8111 | Train F1: 0.8026 | Test Loss: 0.9864 | Test Acc: 0.8417 | Test F1: 0.8253
Epoch 4/7 | Train Loss: 0.9016 | Train Acc: 0.8383 | Train F1: 0.8343 | Test Loss: 0.8727 | Test Acc: 0.8729 | Test F1: 0.8688
Epoch 5/7 | Train Loss: 0.8309 | Train Acc: 0.8573 | Train F1: 0.8583 | Test Loss: 0.8126 | Test Acc: 0.8875 | Test F1: 0.8762
Epoch 6/7 | Train Loss: 0.7996 | Train Acc: 0.8668 | Train F1: 0.8645 | Test Loss: 0.8343 | Test Acc: 0.8812 | Test F1: 0.8762
Epoch 7/7 | Train Loss: 0.8042 | Train Acc: 0.8641 | Train F1: 0.8645 | Test Loss: 0.8218 | Test Acc: 0.8812 | Test F1: 0.8762
Train time on cuda: 334.65038060000006
Inference Loss: 0.8055, Accuracy: 86.53%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▆████
test_f1,▁▅▆████
test_loss,█▅▃▂▁▁▁
test_precision,▁▇█████
test_recall,▁▅▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.86528
inference_loss,0.80546
test_acc,0.88125
test_f1,0.87619
test_loss,0.82184
test_precision,0.89539
test_recall,0.88333
train_acc,0.86413
train_f1,0.86449


wandb: Agent Starting Run: ovvq9qei with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0004722094404645134
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.1101 | Train Acc: 0.1834 | Train F1: 0.1770 | Test Loss: 1.8567 | Test Acc: 0.4052 | Test F1: 0.3860
Epoch 2/7 | Train Loss: 1.6954 | Train Acc: 0.5163 | Train F1: 0.4952 | Test Loss: 1.5568 | Test Acc: 0.6208 | Test F1: 0.6070
Epoch 3/7 | Train Loss: 1.4308 | Train Acc: 0.7133 | Train F1: 0.7006 | Test Loss: 1.3433 | Test Acc: 0.7583 | Test F1: 0.7325
Epoch 4/7 | Train Loss: 1.2765 | Train Acc: 0.7690 | Train F1: 0.7570 | Test Loss: 1.2251 | Test Acc: 0.7958 | Test F1: 0.7677
Epoch 5/7 | Train Loss: 1.1890 | Train Acc: 0.7894 | Train F1: 0.7782 | Test Loss: 1.1728 | Test Acc: 0.8177 | Test F1: 0.7903
Epoch 6/7 | Train Loss: 1.1627 | Train Acc: 0.7962 | Train F1: 0.7901 | Test Loss: 1.1774 | Test Acc: 0.8052 | Test F1: 0.7903
Epoch 7/7 | Train Loss: 1.1626 | Train Acc: 0.7976 | Train F1: 0.7901 | Test Loss: 1.1800 | Test Acc: 0.8083 | Test F1: 0.7903
Train time on cuda: 334.0690211000001
Inference Loss: 1.1603, Accuracy: 80.00%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇████
test_f1,▁▅▇████
test_loss,█▅▃▂▁▁▁
test_precision,▁▅▇████
test_recall,▁▅▇████
train_acc,▁▅▇████
train_f1,▁▅▇████

0,1
epoch,7
inference_accuracy,0.8
inference_loss,1.16028
test_acc,0.80833
test_f1,0.79031
test_loss,1.18001
test_precision,0.81443
test_recall,0.80556
train_acc,0.79755
train_f1,0.79014


wandb: Agent Starting Run: a69soro5 with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0008871236696519004
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.9959 | Train Acc: 0.2826 | Train F1: 0.2921 | Test Loss: 1.5807 | Test Acc: 0.5698 | Test F1: 0.4751
Epoch 2/7 | Train Loss: 1.3614 | Train Acc: 0.6467 | Train F1: 0.6132 | Test Loss: 1.1215 | Test Acc: 0.7427 | Test F1: 0.7024
Epoch 3/7 | Train Loss: 1.0171 | Train Acc: 0.7799 | Train F1: 0.7701 | Test Loss: 0.9241 | Test Acc: 0.7906 | Test F1: 0.7524
Epoch 4/7 | Train Loss: 0.8447 | Train Acc: 0.8247 | Train F1: 0.8171 | Test Loss: 0.8016 | Test Acc: 0.8594 | Test F1: 0.8467
Epoch 5/7 | Train Loss: 0.7755 | Train Acc: 0.8424 | Train F1: 0.8448 | Test Loss: 0.7678 | Test Acc: 0.8604 | Test F1: 0.8565
Epoch 6/7 | Train Loss: 0.7471 | Train Acc: 0.8573 | Train F1: 0.8546 | Test Loss: 0.7555 | Test Acc: 0.8667 | Test F1: 0.8565
Epoch 7/7 | Train Loss: 0.7446 | Train Acc: 0.8546 | Train F1: 0.8546 | Test Loss: 0.7727 | Test Acc: 0.8635 | Test F1: 0.8565
Train time on cuda: 334.50497829999995
Inference Loss: 0.7453, Accuracy: 85.42%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▆████
test_f1,▁▅▆████
test_loss,█▄▂▁▁▁▁
test_precision,▁▅▆████
test_recall,▁▅▆████
train_acc,▁▅▇████
train_f1,▁▅▇████

0,1
epoch,7
inference_accuracy,0.85417
inference_loss,0.74532
test_acc,0.86354
test_f1,0.85652
test_loss,0.77267
test_precision,0.86679
test_recall,0.86111
train_acc,0.85462
train_f1,0.85464


wandb: Agent Starting Run: t6j5t9y7 with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0005462197046176492
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.6112 | Train Acc: 0.5056 | Train F1: 0.4916 | Test Loss: 0.9928 | Test Acc: 0.7989 | Test F1: 0.7798
Epoch 2/7 | Train Loss: 0.7875 | Train Acc: 0.8542 | Train F1: 0.8514 | Test Loss: 0.6394 | Test Acc: 0.8750 | Test F1: 0.8770
Epoch 3/7 | Train Loss: 0.5720 | Train Acc: 0.8806 | Train F1: 0.8806 | Test Loss: 0.5179 | Test Acc: 0.8913 | Test F1: 0.8862
Epoch 4/7 | Train Loss: 0.4868 | Train Acc: 0.9014 | Train F1: 0.9014 | Test Loss: 0.4541 | Test Acc: 0.9239 | Test F1: 0.9237
Epoch 5/7 | Train Loss: 0.4482 | Train Acc: 0.9069 | Train F1: 0.9074 | Test Loss: 0.4381 | Test Acc: 0.9239 | Test F1: 0.9237
Epoch 6/7 | Train Loss: 0.4346 | Train Acc: 0.9111 | Train F1: 0.9114 | Test Loss: 0.4342 | Test Acc: 0.9239 | Test F1: 0.9237
Epoch 7/7 | Train Loss: 0.4346 | Train Acc: 0.9111 | Train F1: 0.9114 | Test Loss: 0.4345 | Test Acc: 0.9239 | Test F1: 0.9237
Train time on cuda: 331.9141902000001
Inference Loss: 0.4346, Accuracy: 91.11%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▆████
test_f1,▁▆▆████
test_loss,█▄▂▁▁▁▁
test_precision,▁▅▆████
test_recall,▁▆▆████
train_acc,▁▇▇████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.91111
inference_loss,0.43457
test_acc,0.92391
test_f1,0.92371
test_loss,0.43448
test_precision,0.9292
test_recall,0.92222
train_acc,0.91111
train_f1,0.91136


wandb: Agent Starting Run: 66ddjneg with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0006499051553144666
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.7070 | Train Acc: 0.4792 | Train F1: 0.4759 | Test Loss: 1.2071 | Test Acc: 0.7500 | Test F1: 0.7397
Epoch 2/7 | Train Loss: 0.9691 | Train Acc: 0.8236 | Train F1: 0.8220 | Test Loss: 0.7577 | Test Acc: 0.8594 | Test F1: 0.8506
Epoch 3/7 | Train Loss: 0.7027 | Train Acc: 0.8778 | Train F1: 0.8773 | Test Loss: 0.6496 | Test Acc: 0.8542 | Test F1: 0.8612
Epoch 4/7 | Train Loss: 0.5880 | Train Acc: 0.8917 | Train F1: 0.8920 | Test Loss: 0.5598 | Test Acc: 0.9010 | Test F1: 0.8965
Epoch 5/7 | Train Loss: 0.5423 | Train Acc: 0.9014 | Train F1: 0.9016 | Test Loss: 0.5354 | Test Acc: 0.8906 | Test F1: 0.9028
Epoch 6/7 | Train Loss: 0.5266 | Train Acc: 0.9056 | Train F1: 0.9059 | Test Loss: 0.5423 | Test Acc: 0.9062 | Test F1: 0.9028
Epoch 7/7 | Train Loss: 0.5266 | Train Acc: 0.9056 | Train F1: 0.9059 | Test Loss: 0.5339 | Test Acc: 0.9062 | Test F1: 0.9028
Train time on cuda: 333.73586650000016
Inference Loss: 0.5266, Accuracy: 90.56%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▆█▇██
test_f1,▁▆▆████
test_loss,█▃▂▁▁▁▁
test_precision,▁▅▆████
test_recall,▁▆▆████
train_acc,▁▇█████
train_f1,▁▇█████

0,1
epoch,7
inference_accuracy,0.90556
inference_loss,0.52659
test_acc,0.90625
test_f1,0.9028
test_loss,0.53387
test_precision,0.91311
test_recall,0.9
train_acc,0.90556
train_f1,0.90587


wandb: Agent Starting Run: de56rejo with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0009869224205048266
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.7232 | Train Acc: 0.4181 | Train F1: 0.4100 | Test Loss: 1.0630 | Test Acc: 0.6771 | Test F1: 0.6328
Epoch 2/7 | Train Loss: 0.7988 | Train Acc: 0.8236 | Train F1: 0.8206 | Test Loss: 0.6160 | Test Acc: 0.9167 | Test F1: 0.9091
Epoch 3/7 | Train Loss: 0.5525 | Train Acc: 0.8903 | Train F1: 0.8898 | Test Loss: 0.4884 | Test Acc: 0.9271 | Test F1: 0.9224
Epoch 4/7 | Train Loss: 0.4535 | Train Acc: 0.9056 | Train F1: 0.9050 | Test Loss: 0.4195 | Test Acc: 0.9427 | Test F1: 0.9399
Epoch 5/7 | Train Loss: 0.4139 | Train Acc: 0.9250 | Train F1: 0.9254 | Test Loss: 0.4102 | Test Acc: 0.9479 | Test F1: 0.9454
Epoch 6/7 | Train Loss: 0.4005 | Train Acc: 0.9306 | Train F1: 0.9310 | Test Loss: 0.4214 | Test Acc: 0.9479 | Test F1: 0.9454
Epoch 7/7 | Train Loss: 0.4005 | Train Acc: 0.9306 | Train F1: 0.9310 | Test Loss: 0.3971 | Test Acc: 0.9479 | Test F1: 0.9454
Train time on cuda: 333.4818471999997
Inference Loss: 0.4005, Accuracy: 93.06%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▇▇████
test_f1,▁▇▇████
test_loss,█▃▂▁▁▁▁
test_precision,▁▆▇████
test_recall,▁▇▇████
train_acc,▁▇▇████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.93056
inference_loss,0.40052
test_acc,0.94792
test_f1,0.94544
test_loss,0.3971
test_precision,0.95271
test_recall,0.94444
train_acc,0.93056
train_f1,0.93102


wandb: Agent Starting Run: mn118stu with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0008607643859221361
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.3918 | Train Acc: 0.5653 | Train F1: 0.5631 | Test Loss: 0.7493 | Test Acc: 0.8424 | Test F1: 0.8374
Epoch 2/7 | Train Loss: 0.5521 | Train Acc: 0.8847 | Train F1: 0.8842 | Test Loss: 0.4589 | Test Acc: 0.9185 | Test F1: 0.9171
Epoch 3/7 | Train Loss: 0.4024 | Train Acc: 0.9083 | Train F1: 0.9088 | Test Loss: 0.3992 | Test Acc: 0.9076 | Test F1: 0.9123
Epoch 4/7 | Train Loss: 0.3477 | Train Acc: 0.9111 | Train F1: 0.9109 | Test Loss: 0.3352 | Test Acc: 0.9457 | Test F1: 0.9453
Epoch 5/7 | Train Loss: 0.3133 | Train Acc: 0.9278 | Train F1: 0.9281 | Test Loss: 0.3346 | Test Acc: 0.9402 | Test F1: 0.9457
Epoch 6/7 | Train Loss: 0.3015 | Train Acc: 0.9319 | Train F1: 0.9324 | Test Loss: 0.3235 | Test Acc: 0.9457 | Test F1: 0.9457
Epoch 7/7 | Train Loss: 0.3015 | Train Acc: 0.9319 | Train F1: 0.9324 | Test Loss: 0.3248 | Test Acc: 0.9457 | Test F1: 0.9457
Train time on cuda: 332.1569161999996
Inference Loss: 0.3015, Accuracy: 93.19%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▅████
test_f1,▁▆▆████
test_loss,█▃▂▁▁▁▁
test_precision,▁▆▆████
test_recall,▁▆▆████
train_acc,▁▇█████
train_f1,▁▇█████

0,1
epoch,7
inference_accuracy,0.93194
inference_loss,0.30154
test_acc,0.94565
test_f1,0.94569
test_loss,0.32484
test_precision,0.95204
test_recall,0.94444
train_acc,0.93194
train_f1,0.93238


wandb: Agent Starting Run: vz5v5xr7 with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0005186112793542219
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.2055 | Train Acc: 0.2038 | Train F1: 0.1754 | Test Loss: 1.9665 | Test Acc: 0.3271 | Test F1: 0.2912
Epoch 2/7 | Train Loss: 1.7244 | Train Acc: 0.5027 | Train F1: 0.4709 | Test Loss: 1.6382 | Test Acc: 0.5615 | Test F1: 0.5627
Epoch 3/7 | Train Loss: 1.4413 | Train Acc: 0.6671 | Train F1: 0.6494 | Test Loss: 1.3928 | Test Acc: 0.7042 | Test F1: 0.6743
Epoch 4/7 | Train Loss: 1.2563 | Train Acc: 0.7636 | Train F1: 0.7498 | Test Loss: 1.2635 | Test Acc: 0.7500 | Test F1: 0.7284
Epoch 5/7 | Train Loss: 1.1681 | Train Acc: 0.7826 | Train F1: 0.7721 | Test Loss: 1.2050 | Test Acc: 0.7583 | Test F1: 0.7322
Epoch 6/7 | Train Loss: 1.1342 | Train Acc: 0.7908 | Train F1: 0.7753 | Test Loss: 1.2049 | Test Acc: 0.7552 | Test F1: 0.7322
Epoch 7/7 | Train Loss: 1.1415 | Train Acc: 0.7853 | Train F1: 0.7753 | Test Loss: 1.2031 | Test Acc: 0.7646 | Test F1: 0.7322
Train time on cuda: 334.4828544000002
Inference Loss: 1.1380, Accuracy: 78.61%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇████
test_f1,▁▅▇████
test_loss,█▅▃▂▁▁▁
test_precision,▁▆▇████
test_recall,▁▅▇████
train_acc,▁▅▇████
train_f1,▁▄▇████

0,1
epoch,7
inference_accuracy,0.78611
inference_loss,1.13799
test_acc,0.76458
test_f1,0.73217
test_loss,1.20312
test_precision,0.73652
test_recall,0.75556
train_acc,0.78533
train_f1,0.77532


wandb: Agent Starting Run: pfxywlkw with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0004192340308168372
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.1289 | Train Acc: 0.2011 | Train F1: 0.1798 | Test Loss: 1.9250 | Test Acc: 0.3604 | Test F1: 0.3351
Epoch 2/7 | Train Loss: 1.7551 | Train Acc: 0.5068 | Train F1: 0.4991 | Test Loss: 1.6455 | Test Acc: 0.5698 | Test F1: 0.5696
Epoch 3/7 | Train Loss: 1.5054 | Train Acc: 0.6929 | Train F1: 0.6797 | Test Loss: 1.4519 | Test Acc: 0.7260 | Test F1: 0.7288
Epoch 4/7 | Train Loss: 1.3539 | Train Acc: 0.7677 | Train F1: 0.7596 | Test Loss: 1.3288 | Test Acc: 0.7854 | Test F1: 0.7745
Epoch 5/7 | Train Loss: 1.2731 | Train Acc: 0.7880 | Train F1: 0.7855 | Test Loss: 1.2858 | Test Acc: 0.8146 | Test F1: 0.8018
Epoch 6/7 | Train Loss: 1.2435 | Train Acc: 0.7921 | Train F1: 0.7864 | Test Loss: 1.2941 | Test Acc: 0.8021 | Test F1: 0.8018
Epoch 7/7 | Train Loss: 1.2471 | Train Acc: 0.7908 | Train F1: 0.7864 | Test Loss: 1.2809 | Test Acc: 0.8052 | Test F1: 0.8018
Train time on cuda: 334.2382447
Inference Loss: 1.2400, Accuracy: 79.17%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▄▇████
test_f1,▁▅▇████
test_loss,█▅▃▂▁▁▁
test_precision,▁▄▇▇███
test_recall,▁▄▇████
train_acc,▁▅▇████
train_f1,▁▅▇████

0,1
epoch,7
inference_accuracy,0.79167
inference_loss,1.24001
test_acc,0.80521
test_f1,0.80176
test_loss,1.28087
test_precision,0.8163
test_recall,0.80556
train_acc,0.79076
train_f1,0.7864


wandb: Agent Starting Run: bq184797 with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.000463395587945426
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.0517 | Train Acc: 0.2806 | Train F1: 0.2937 | Test Loss: 1.6034 | Test Acc: 0.6198 | Test F1: 0.6155
Epoch 2/7 | Train Loss: 1.3499 | Train Acc: 0.7236 | Train F1: 0.7171 | Test Loss: 1.1127 | Test Acc: 0.8177 | Test F1: 0.8183
Epoch 3/7 | Train Loss: 1.0039 | Train Acc: 0.8167 | Train F1: 0.8142 | Test Loss: 0.8958 | Test Acc: 0.8490 | Test F1: 0.8540
Epoch 4/7 | Train Loss: 0.8419 | Train Acc: 0.8500 | Train F1: 0.8498 | Test Loss: 0.8240 | Test Acc: 0.8438 | Test F1: 0.8654
Epoch 5/7 | Train Loss: 0.7711 | Train Acc: 0.8569 | Train F1: 0.8570 | Test Loss: 0.7300 | Test Acc: 0.8854 | Test F1: 0.8759
Epoch 6/7 | Train Loss: 0.7481 | Train Acc: 0.8667 | Train F1: 0.8666 | Test Loss: 0.7766 | Test Acc: 0.8698 | Test F1: 0.8759
Epoch 7/7 | Train Loss: 0.7481 | Train Acc: 0.8667 | Train F1: 0.8666 | Test Loss: 0.7565 | Test Acc: 0.8854 | Test F1: 0.8759
Train time on cuda: 334.20366809999996
Inference Loss: 0.7481, Accuracy: 86.67%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▇▇███
test_f1,▁▆▇████
test_loss,█▄▂▂▁▁▁
test_precision,▁▆▇████
test_recall,▁▆▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.86667
inference_loss,0.74807
test_acc,0.88542
test_f1,0.8759
test_loss,0.75648
test_precision,0.89381
test_recall,0.87778
train_acc,0.86667
train_f1,0.8666


wandb: Agent Starting Run: kfqk41wm with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0002723654054511924
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.9753 | Train Acc: 0.3500 | Train F1: 0.3466 | Test Loss: 1.7415 | Test Acc: 0.5000 | Test F1: 0.5045
Epoch 2/7 | Train Loss: 1.5320 | Train Acc: 0.6111 | Train F1: 0.5841 | Test Loss: 1.4012 | Test Acc: 0.6510 | Test F1: 0.6181
Epoch 3/7 | Train Loss: 1.2721 | Train Acc: 0.7167 | Train F1: 0.7048 | Test Loss: 1.2121 | Test Acc: 0.7240 | Test F1: 0.7066
Epoch 4/7 | Train Loss: 1.1277 | Train Acc: 0.7597 | Train F1: 0.7524 | Test Loss: 1.0973 | Test Acc: 0.7760 | Test F1: 0.7531
Epoch 5/7 | Train Loss: 1.0538 | Train Acc: 0.7833 | Train F1: 0.7777 | Test Loss: 1.0637 | Test Acc: 0.7917 | Test F1: 0.7650
Epoch 6/7 | Train Loss: 1.0287 | Train Acc: 0.7889 | Train F1: 0.7830 | Test Loss: 1.0813 | Test Acc: 0.7604 | Test F1: 0.7650
Epoch 7/7 | Train Loss: 1.0287 | Train Acc: 0.7889 | Train F1: 0.7830 | Test Loss: 1.0870 | Test Acc: 0.7604 | Test F1: 0.7650
Train time on cuda: 332.8580155999998
Inference Loss: 1.0287, Accuracy: 78.89%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▆██▇▇
test_f1,▁▄▆████
test_loss,█▄▃▁▁▁▁
test_precision,▁▄▆████
test_recall,▁▄▇████
train_acc,▁▅▇████
train_f1,▁▅▇████

0,1
epoch,7
inference_accuracy,0.78889
inference_loss,1.02872
test_acc,0.76042
test_f1,0.76503
test_loss,1.08701
test_precision,0.77314
test_recall,0.77778
train_acc,0.78889
train_f1,0.78301


wandb: Agent Starting Run: kmy5ytwk with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.000860637604213834
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.6542 | Train Acc: 0.5083 | Train F1: 0.4992 | Test Loss: 1.0099 | Test Acc: 0.8177 | Test F1: 0.8344
Epoch 2/7 | Train Loss: 0.8169 | Train Acc: 0.8222 | Train F1: 0.8209 | Test Loss: 0.6400 | Test Acc: 0.8854 | Test F1: 0.8861
Epoch 3/7 | Train Loss: 0.5692 | Train Acc: 0.8708 | Train F1: 0.8700 | Test Loss: 0.4801 | Test Acc: 0.9375 | Test F1: 0.9327
Epoch 4/7 | Train Loss: 0.4701 | Train Acc: 0.9000 | Train F1: 0.9006 | Test Loss: 0.4654 | Test Acc: 0.9375 | Test F1: 0.9499
Epoch 5/7 | Train Loss: 0.4311 | Train Acc: 0.9069 | Train F1: 0.9078 | Test Loss: 0.4297 | Test Acc: 0.9167 | Test F1: 0.9281
Epoch 6/7 | Train Loss: 0.4174 | Train Acc: 0.9056 | Train F1: 0.9062 | Test Loss: 0.4117 | Test Acc: 0.9323 | Test F1: 0.9281
Epoch 7/7 | Train Loss: 0.4174 | Train Acc: 0.9056 | Train F1: 0.9062 | Test Loss: 0.4215 | Test Acc: 0.9323 | Test F1: 0.9281
Train time on cuda: 332.951188
Inference Loss: 0.4175, Accuracy: 90.56%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅██▇██
test_f1,▁▄▇█▇▇▇
test_loss,█▄▂▂▁▁▁
test_precision,▁▅▇█▇▇▇
test_recall,▁▄▇█▇▇▇
train_acc,▁▇▇████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.90556
inference_loss,0.41751
test_acc,0.93229
test_f1,0.92809
test_loss,0.42148
test_precision,0.93425
test_recall,0.92778
train_acc,0.90556
train_f1,0.90619


wandb: Agent Starting Run: 5n1z67cv with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0006465163116572036
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.7699 | Train Acc: 0.4069 | Train F1: 0.3890 | Test Loss: 1.2833 | Test Acc: 0.7396 | Test F1: 0.7516
Epoch 2/7 | Train Loss: 1.0298 | Train Acc: 0.7889 | Train F1: 0.7864 | Test Loss: 0.8480 | Test Acc: 0.8594 | Test F1: 0.8486
Epoch 3/7 | Train Loss: 0.7505 | Train Acc: 0.8403 | Train F1: 0.8384 | Test Loss: 0.6653 | Test Acc: 0.9010 | Test F1: 0.8955
Epoch 4/7 | Train Loss: 0.6282 | Train Acc: 0.8736 | Train F1: 0.8727 | Test Loss: 0.5791 | Test Acc: 0.9062 | Test F1: 0.9031
Epoch 5/7 | Train Loss: 0.5774 | Train Acc: 0.8833 | Train F1: 0.8829 | Test Loss: 0.5855 | Test Acc: 0.9167 | Test F1: 0.9134
Epoch 6/7 | Train Loss: 0.5606 | Train Acc: 0.8861 | Train F1: 0.8860 | Test Loss: 0.5916 | Test Acc: 0.8854 | Test F1: 0.9134
Epoch 7/7 | Train Loss: 0.5606 | Train Acc: 0.8861 | Train F1: 0.8860 | Test Loss: 0.5599 | Test Acc: 0.9167 | Test F1: 0.9134
Train time on cuda: 333.2773465
Inference Loss: 0.5607, Accuracy: 88.75%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▇██▇█
test_f1,▁▅▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▅▇████
test_recall,▁▅▇████
train_acc,▁▇▇████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.8875
inference_loss,0.56074
test_acc,0.91667
test_f1,0.91344
test_loss,0.55995
test_precision,0.92553
test_recall,0.91111
train_acc,0.88611
train_f1,0.88602


wandb: Agent Starting Run: e1p4b3xn with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.000293863317961713
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.3364 | Train Acc: 0.0679 | Train F1: 0.0583 | Test Loss: 2.1390 | Test Acc: 0.1188 | Test F1: 0.1115
Epoch 2/7 | Train Loss: 2.0235 | Train Acc: 0.2310 | Train F1: 0.2274 | Test Loss: 1.8947 | Test Acc: 0.3521 | Test F1: 0.3289
Epoch 3/7 | Train Loss: 1.8324 | Train Acc: 0.4524 | Train F1: 0.4427 | Test Loss: 1.7388 | Test Acc: 0.5312 | Test F1: 0.5264
Epoch 4/7 | Train Loss: 1.6982 | Train Acc: 0.5734 | Train F1: 0.5675 | Test Loss: 1.6393 | Test Acc: 0.6260 | Test F1: 0.6246
Epoch 5/7 | Train Loss: 1.6243 | Train Acc: 0.6277 | Train F1: 0.6201 | Test Loss: 1.5878 | Test Acc: 0.6677 | Test F1: 0.6517
Epoch 6/7 | Train Loss: 1.5975 | Train Acc: 0.6413 | Train F1: 0.6353 | Test Loss: 1.5927 | Test Acc: 0.6552 | Test F1: 0.6517
Epoch 7/7 | Train Loss: 1.5998 | Train Acc: 0.6413 | Train F1: 0.6353 | Test Loss: 1.5971 | Test Acc: 0.6552 | Test F1: 0.6517
Train time on cuda: 334.3302457
Inference Loss: 1.5960, Accuracy: 64.44%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▄▆▇███
test_f1,▁▄▆████
test_loss,█▅▃▂▁▁▁
test_precision,▁▄▇████
test_recall,▁▄▆████
train_acc,▁▃▆▇███
train_f1,▁▃▆▇███

0,1
epoch,7
inference_accuracy,0.64444
inference_loss,1.59601
test_acc,0.65521
test_f1,0.6517
test_loss,1.59715
test_precision,0.68855
test_recall,0.65556
train_acc,0.6413
train_f1,0.63531


wandb: Agent Starting Run: 52opl40e with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00039266726209585335
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.4063 | Train Acc: 0.6083 | Train F1: 0.6112 | Test Loss: 0.8230 | Test Acc: 0.8167 | Test F1: 0.8038
Epoch 2/7 | Train Loss: 0.6658 | Train Acc: 0.8431 | Train F1: 0.8377 | Test Loss: 0.5556 | Test Acc: 0.8889 | Test F1: 0.8914
Epoch 3/7 | Train Loss: 0.4980 | Train Acc: 0.8931 | Train F1: 0.8936 | Test Loss: 0.4494 | Test Acc: 0.9444 | Test F1: 0.9456
Epoch 4/7 | Train Loss: 0.4254 | Train Acc: 0.9111 | Train F1: 0.9114 | Test Loss: 0.4021 | Test Acc: 0.9444 | Test F1: 0.9444
Epoch 5/7 | Train Loss: 0.3893 | Train Acc: 0.9222 | Train F1: 0.9226 | Test Loss: 0.3852 | Test Acc: 0.9556 | Test F1: 0.9560
Epoch 6/7 | Train Loss: 0.3755 | Train Acc: 0.9208 | Train F1: 0.9213 | Test Loss: 0.3852 | Test Acc: 0.9556 | Test F1: 0.9560
Epoch 7/7 | Train Loss: 0.3755 | Train Acc: 0.9208 | Train F1: 0.9213 | Test Loss: 0.3852 | Test Acc: 0.9556 | Test F1: 0.9560
Train time on cuda: 329.214856399999
Inference Loss: 0.3755, Accuracy: 92.08%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇▇███
test_f1,▁▅█▇███
test_loss,█▄▂▁▁▁▁
test_precision,▁▅▇▇███
test_recall,▁▅▇▇███
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.92083
inference_loss,0.37554
test_acc,0.95556
test_f1,0.95601
test_loss,0.38524
test_precision,0.96557
test_recall,0.95556
train_acc,0.92083
train_f1,0.9213


wandb: Agent Starting Run: 5tl91tz5 with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0003673758755760435
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.0032 | Train Acc: 0.2583 | Train F1: 0.2828 | Test Loss: 1.6136 | Test Acc: 0.5677 | Test F1: 0.5035
Epoch 2/7 | Train Loss: 1.4381 | Train Acc: 0.6319 | Train F1: 0.6084 | Test Loss: 1.2520 | Test Acc: 0.7344 | Test F1: 0.7260
Epoch 3/7 | Train Loss: 1.1307 | Train Acc: 0.7667 | Train F1: 0.7641 | Test Loss: 1.0319 | Test Acc: 0.8333 | Test F1: 0.8390
Epoch 4/7 | Train Loss: 0.9764 | Train Acc: 0.8097 | Train F1: 0.8092 | Test Loss: 0.9310 | Test Acc: 0.8385 | Test F1: 0.8445
Epoch 5/7 | Train Loss: 0.9010 | Train Acc: 0.8153 | Train F1: 0.8134 | Test Loss: 0.8991 | Test Acc: 0.8594 | Test F1: 0.8666
Epoch 6/7 | Train Loss: 0.8772 | Train Acc: 0.8208 | Train F1: 0.8188 | Test Loss: 0.8857 | Test Acc: 0.8594 | Test F1: 0.8666
Epoch 7/7 | Train Loss: 0.8772 | Train Acc: 0.8208 | Train F1: 0.8188 | Test Loss: 0.8588 | Test Acc: 0.8750 | Test F1: 0.8666
Train time on cuda: 333.7405703000004
Inference Loss: 0.8772, Accuracy: 81.94%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇▇███
test_f1,▁▅▇████
test_loss,█▅▃▂▁▁▁
test_precision,▁▆█████
test_recall,▁▅▇████
train_acc,▁▆▇████
train_f1,▁▅▇████

0,1
epoch,7
inference_accuracy,0.81944
inference_loss,0.87721
test_acc,0.875
test_f1,0.86665
test_loss,0.85881
test_precision,0.87242
test_recall,0.86667
train_acc,0.82083
train_f1,0.81882


wandb: Agent Starting Run: opu7rbbc with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0001058760830805093
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.4890 | Train Acc: 0.1087 | Train F1: 0.0587 | Test Loss: 2.3436 | Test Acc: 0.2021 | Test F1: 0.1367
Epoch 2/7 | Train Loss: 2.3533 | Train Acc: 0.1291 | Train F1: 0.0794 | Test Loss: 2.2549 | Test Acc: 0.2208 | Test F1: 0.1562
Epoch 3/7 | Train Loss: 2.2624 | Train Acc: 0.1617 | Train F1: 0.1208 | Test Loss: 2.1752 | Test Acc: 0.2323 | Test F1: 0.1755
Epoch 4/7 | Train Loss: 2.1985 | Train Acc: 0.1766 | Train F1: 0.1392 | Test Loss: 2.1345 | Test Acc: 0.2531 | Test F1: 0.2014
Epoch 5/7 | Train Loss: 2.1620 | Train Acc: 0.1821 | Train F1: 0.1461 | Test Loss: 2.1091 | Test Acc: 0.2687 | Test F1: 0.2258
Epoch 6/7 | Train Loss: 2.1531 | Train Acc: 0.1875 | Train F1: 0.1537 | Test Loss: 2.1168 | Test Acc: 0.2687 | Test F1: 0.2258
Epoch 7/7 | Train Loss: 2.1554 | Train Acc: 0.1848 | Train F1: 0.1537 | Test Loss: 2.1130 | Test Acc: 0.2656 | Test F1: 0.2258
Train time on cuda: 334.4073435999999
Inference Loss: 2.1542, Accuracy: 18.75%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▃▄▆███
test_f1,▁▃▄▆███
test_loss,█▅▃▂▁▁▁
test_precision,▁▂▃▅███
test_recall,▁▃▄▆███
train_acc,▁▃▆▇███
train_f1,▁▃▆▇▇██

0,1
epoch,7
inference_accuracy,0.1875
inference_loss,2.15419
test_acc,0.26562
test_f1,0.22576
test_loss,2.11303
test_precision,0.21622
test_recall,0.26667
train_acc,0.18478
train_f1,0.15371


wandb: Agent Starting Run: t2jdoq8y with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0002937728186888008
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.1386 | Train Acc: 0.1793 | Train F1: 0.1765 | Test Loss: 1.9908 | Test Acc: 0.3281 | Test F1: 0.2811
Epoch 2/7 | Train Loss: 1.8658 | Train Acc: 0.3886 | Train F1: 0.3600 | Test Loss: 1.7830 | Test Acc: 0.4469 | Test F1: 0.4172
Epoch 3/7 | Train Loss: 1.6817 | Train Acc: 0.5285 | Train F1: 0.5037 | Test Loss: 1.6457 | Test Acc: 0.5229 | Test F1: 0.5010
Epoch 4/7 | Train Loss: 1.5644 | Train Acc: 0.6019 | Train F1: 0.5867 | Test Loss: 1.5506 | Test Acc: 0.6031 | Test F1: 0.5737
Epoch 5/7 | Train Loss: 1.4946 | Train Acc: 0.6576 | Train F1: 0.6405 | Test Loss: 1.5120 | Test Acc: 0.6146 | Test F1: 0.5999
Epoch 6/7 | Train Loss: 1.4714 | Train Acc: 0.6644 | Train F1: 0.6502 | Test Loss: 1.5132 | Test Acc: 0.6177 | Test F1: 0.5999
Epoch 7/7 | Train Loss: 1.4722 | Train Acc: 0.6671 | Train F1: 0.6502 | Test Loss: 1.5031 | Test Acc: 0.6177 | Test F1: 0.5999
Train time on cuda: 334.0968699999994
Inference Loss: 1.4735, Accuracy: 66.67%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▄▆████
test_f1,▁▄▆▇███
test_loss,█▅▃▂▁▁▁
test_precision,▁▅▆████
test_recall,▁▄▆▇███
train_acc,▁▄▆▇███
train_f1,▁▄▆▇███

0,1
epoch,7
inference_accuracy,0.66667
inference_loss,1.47348
test_acc,0.61771
test_f1,0.59991
test_loss,1.5031
test_precision,0.63451
test_recall,0.62222
train_acc,0.66712
train_f1,0.65017


wandb: Agent Starting Run: 3a8xakjo with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0005977232235372235
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.4415 | Train Acc: 0.6028 | Train F1: 0.5842 | Test Loss: 0.9034 | Test Acc: 0.8261 | Test F1: 0.8174
Epoch 2/7 | Train Loss: 0.6975 | Train Acc: 0.8347 | Train F1: 0.8334 | Test Loss: 0.5863 | Test Acc: 0.8804 | Test F1: 0.8898
Epoch 3/7 | Train Loss: 0.5051 | Train Acc: 0.8944 | Train F1: 0.8951 | Test Loss: 0.4746 | Test Acc: 0.9022 | Test F1: 0.8995
Epoch 4/7 | Train Loss: 0.4342 | Train Acc: 0.9083 | Train F1: 0.9085 | Test Loss: 0.4295 | Test Acc: 0.9185 | Test F1: 0.9189
Epoch 5/7 | Train Loss: 0.3995 | Train Acc: 0.9181 | Train F1: 0.9185 | Test Loss: 0.4172 | Test Acc: 0.9130 | Test F1: 0.9178
Epoch 6/7 | Train Loss: 0.3877 | Train Acc: 0.9250 | Train F1: 0.9255 | Test Loss: 0.4138 | Test Acc: 0.9185 | Test F1: 0.9178
Epoch 7/7 | Train Loss: 0.3877 | Train Acc: 0.9250 | Train F1: 0.9255 | Test Loss: 0.4142 | Test Acc: 0.9185 | Test F1: 0.9178
Train time on cuda: 331.66616760000034
Inference Loss: 0.3877, Accuracy: 92.50%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇████
test_f1,▁▆▇████
test_loss,█▃▂▁▁▁▁
test_precision,▁▅▆████
test_recall,▁▆▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.925
inference_loss,0.38774
test_acc,0.91848
test_f1,0.91779
test_loss,0.41416
test_precision,0.92811
test_recall,0.91667
train_acc,0.925
train_f1,0.92545


wandb: Agent Starting Run: i0k2vwbu with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0001912647047760462
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.1274 | Train Acc: 0.2278 | Train F1: 0.2267 | Test Loss: 1.8281 | Test Acc: 0.4837 | Test F1: 0.4652
Epoch 2/7 | Train Loss: 1.5574 | Train Acc: 0.6486 | Train F1: 0.6389 | Test Loss: 1.3944 | Test Acc: 0.7554 | Test F1: 0.7387
Epoch 3/7 | Train Loss: 1.2428 | Train Acc: 0.7833 | Train F1: 0.7783 | Test Loss: 1.1782 | Test Acc: 0.8043 | Test F1: 0.8059
Epoch 4/7 | Train Loss: 1.0806 | Train Acc: 0.8264 | Train F1: 0.8252 | Test Loss: 1.0605 | Test Acc: 0.8315 | Test F1: 0.8331
Epoch 5/7 | Train Loss: 1.0004 | Train Acc: 0.8347 | Train F1: 0.8348 | Test Loss: 1.0115 | Test Acc: 0.8533 | Test F1: 0.8510
Epoch 6/7 | Train Loss: 0.9749 | Train Acc: 0.8389 | Train F1: 0.8388 | Test Loss: 1.0117 | Test Acc: 0.8533 | Test F1: 0.8510
Epoch 7/7 | Train Loss: 0.9749 | Train Acc: 0.8389 | Train F1: 0.8388 | Test Loss: 1.0100 | Test Acc: 0.8533 | Test F1: 0.8510
Train time on cuda: 331.59511589999966
Inference Loss: 0.9749, Accuracy: 83.75%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▇████
test_f1,▁▆▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆▇████
test_recall,▁▆▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.8375
inference_loss,0.97494
test_acc,0.85326
test_f1,0.85105
test_loss,1.01
test_precision,0.86818
test_recall,0.85
train_acc,0.83889
train_f1,0.83876


wandb: Agent Starting Run: m0rfsqg9 with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0008930973904464679
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.3216 | Train Acc: 0.6042 | Train F1: 0.5966 | Test Loss: 0.6968 | Test Acc: 0.8533 | Test F1: 0.8361
Epoch 2/7 | Train Loss: 0.5199 | Train Acc: 0.8764 | Train F1: 0.8760 | Test Loss: 0.4115 | Test Acc: 0.9185 | Test F1: 0.9163
Epoch 3/7 | Train Loss: 0.3792 | Train Acc: 0.9083 | Train F1: 0.9088 | Test Loss: 0.3418 | Test Acc: 0.9239 | Test F1: 0.9246
Epoch 4/7 | Train Loss: 0.3269 | Train Acc: 0.9292 | Train F1: 0.9297 | Test Loss: 0.3118 | Test Acc: 0.9511 | Test F1: 0.9506
Epoch 5/7 | Train Loss: 0.2977 | Train Acc: 0.9389 | Train F1: 0.9391 | Test Loss: 0.2904 | Test Acc: 0.9674 | Test F1: 0.9675
Epoch 6/7 | Train Loss: 0.2844 | Train Acc: 0.9458 | Train F1: 0.9461 | Test Loss: 0.2918 | Test Acc: 0.9674 | Test F1: 0.9675
Epoch 7/7 | Train Loss: 0.2844 | Train Acc: 0.9458 | Train F1: 0.9461 | Test Loss: 0.2963 | Test Acc: 0.9674 | Test F1: 0.9675
Train time on cuda: 331.78493960000014
Inference Loss: 0.2844, Accuracy: 94.58%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▅▇███
test_f1,▁▅▆▇███
test_loss,█▃▂▁▁▁▁
test_precision,▁▅▄▇███
test_recall,▁▅▅▇███
train_acc,▁▇▇████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.94583
inference_loss,0.28441
test_acc,0.96739
test_f1,0.9675
test_loss,0.29627
test_precision,0.97352
test_recall,0.96667
train_acc,0.94583
train_f1,0.94615


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: 3bqtjjul with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0004132240774410699
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.8655 | Train Acc: 0.3875 | Train F1: 0.4041 | Test Loss: 1.4541 | Test Acc: 0.6667 | Test F1: 0.6215
Epoch 2/7 | Train Loss: 1.2947 | Train Acc: 0.7139 | Train F1: 0.6672 | Test Loss: 1.1313 | Test Acc: 0.7552 | Test F1: 0.7363
Epoch 3/7 | Train Loss: 0.9983 | Train Acc: 0.7958 | Train F1: 0.7781 | Test Loss: 0.9328 | Test Acc: 0.8385 | Test F1: 0.8161
Epoch 4/7 | Train Loss: 0.8604 | Train Acc: 0.8347 | Train F1: 0.8261 | Test Loss: 0.8488 | Test Acc: 0.8438 | Test F1: 0.8411
Epoch 5/7 | Train Loss: 0.7941 | Train Acc: 0.8417 | Train F1: 0.8360 | Test Loss: 0.7987 | Test Acc: 0.8646 | Test F1: 0.8486
Epoch 6/7 | Train Loss: 0.7729 | Train Acc: 0.8500 | Train F1: 0.8454 | Test Loss: 0.7698 | Test Acc: 0.8646 | Test F1: 0.8486
Epoch 7/7 | Train Loss: 0.7729 | Train Acc: 0.8500 | Train F1: 0.8454 | Test Loss: 0.7885 | Test Acc: 0.8646 | Test F1: 0.8486
Train time on cuda: 332.86271189999934
Inference Loss: 0.7729, Accuracy: 85.00%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▄▇▇███
test_f1,▁▅▇████
test_loss,█▅▃▂▁▁▁
test_precision,▁▅▇████
test_recall,▁▄▇████
train_acc,▁▆▇████
train_f1,▁▅▇████

0,1
epoch,7
inference_accuracy,0.85
inference_loss,0.7729
test_acc,0.86458
test_f1,0.84863
test_loss,0.78847
test_precision,0.85579
test_recall,0.85556
train_acc,0.85
train_f1,0.84544


wandb: Agent Starting Run: djpvn9qr with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0009522938475488112
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.3435 | Train Acc: 0.5847 | Train F1: 0.5848 | Test Loss: 0.6311 | Test Acc: 0.8804 | Test F1: 0.8792
Epoch 2/7 | Train Loss: 0.5433 | Train Acc: 0.8625 | Train F1: 0.8620 | Test Loss: 0.4227 | Test Acc: 0.9130 | Test F1: 0.9131
Epoch 3/7 | Train Loss: 0.3859 | Train Acc: 0.9139 | Train F1: 0.9148 | Test Loss: 0.3399 | Test Acc: 0.9402 | Test F1: 0.9400
Epoch 4/7 | Train Loss: 0.3277 | Train Acc: 0.9333 | Train F1: 0.9338 | Test Loss: 0.3070 | Test Acc: 0.9565 | Test F1: 0.9564
Epoch 5/7 | Train Loss: 0.2981 | Train Acc: 0.9389 | Train F1: 0.9391 | Test Loss: 0.2910 | Test Acc: 0.9728 | Test F1: 0.9731
Epoch 6/7 | Train Loss: 0.2872 | Train Acc: 0.9472 | Train F1: 0.9475 | Test Loss: 0.2921 | Test Acc: 0.9728 | Test F1: 0.9731
Epoch 7/7 | Train Loss: 0.2872 | Train Acc: 0.9472 | Train F1: 0.9475 | Test Loss: 0.2895 | Test Acc: 0.9728 | Test F1: 0.9731
Train time on cuda: 332.16403029999856
Inference Loss: 0.2872, Accuracy: 94.72%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▃▆▇███
test_f1,▁▄▆▇███
test_loss,█▄▂▁▁▁▁
test_precision,▁▄▆▇███
test_recall,▁▃▅▇███
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.94722
inference_loss,0.28724
test_acc,0.97283
test_f1,0.97305
test_loss,0.28948
test_precision,0.97884
test_recall,0.97222
train_acc,0.94722
train_f1,0.94745


wandb: Agent Starting Run: 6olxqmyl with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0004562927422830893
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.4298 | Train Acc: 0.5681 | Train F1: 0.5622 | Test Loss: 0.7765 | Test Acc: 0.8778 | Test F1: 0.8757
Epoch 2/7 | Train Loss: 0.6432 | Train Acc: 0.8611 | Train F1: 0.8602 | Test Loss: 0.5046 | Test Acc: 0.9278 | Test F1: 0.9287
Epoch 3/7 | Train Loss: 0.4683 | Train Acc: 0.8986 | Train F1: 0.8985 | Test Loss: 0.4382 | Test Acc: 0.9167 | Test F1: 0.9168
Epoch 4/7 | Train Loss: 0.4080 | Train Acc: 0.9097 | Train F1: 0.9104 | Test Loss: 0.3763 | Test Acc: 0.9556 | Test F1: 0.9556
Epoch 5/7 | Train Loss: 0.3658 | Train Acc: 0.9194 | Train F1: 0.9197 | Test Loss: 0.3582 | Test Acc: 0.9667 | Test F1: 0.9675
Epoch 6/7 | Train Loss: 0.3525 | Train Acc: 0.9250 | Train F1: 0.9254 | Test Loss: 0.3582 | Test Acc: 0.9667 | Test F1: 0.9675
Epoch 7/7 | Train Loss: 0.3525 | Train Acc: 0.9250 | Train F1: 0.9254 | Test Loss: 0.3582 | Test Acc: 0.9667 | Test F1: 0.9675
Train time on cuda: 328.98290090000046
Inference Loss: 0.3526, Accuracy: 92.50%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▄▇███
test_f1,▁▅▄▇███
test_loss,█▃▂▁▁▁▁
test_precision,▁▅▅▇███
test_recall,▁▅▄▇███
train_acc,▁▇▇████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.925
inference_loss,0.35259
test_acc,0.96667
test_f1,0.96749
test_loss,0.35821
test_precision,0.97351
test_recall,0.96667
train_acc,0.925
train_f1,0.92542


wandb: Agent Starting Run: kxyzvrrl with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0006313893518980624
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.8283 | Train Acc: 0.4125 | Train F1: 0.4090 | Test Loss: 1.2980 | Test Acc: 0.7240 | Test F1: 0.6817
Epoch 2/7 | Train Loss: 1.0613 | Train Acc: 0.7972 | Train F1: 0.7917 | Test Loss: 0.8508 | Test Acc: 0.8646 | Test F1: 0.8549
Epoch 3/7 | Train Loss: 0.7612 | Train Acc: 0.8583 | Train F1: 0.8572 | Test Loss: 0.6813 | Test Acc: 0.8854 | Test F1: 0.8744
Epoch 4/7 | Train Loss: 0.6356 | Train Acc: 0.8861 | Train F1: 0.8857 | Test Loss: 0.5893 | Test Acc: 0.9167 | Test F1: 0.9101
Epoch 5/7 | Train Loss: 0.5824 | Train Acc: 0.8875 | Train F1: 0.8875 | Test Loss: 0.5718 | Test Acc: 0.9062 | Test F1: 0.8988
Epoch 6/7 | Train Loss: 0.5638 | Train Acc: 0.9014 | Train F1: 0.9016 | Test Loss: 0.5773 | Test Acc: 0.9062 | Test F1: 0.8988
Epoch 7/7 | Train Loss: 0.5638 | Train Acc: 0.9014 | Train F1: 0.9016 | Test Loss: 0.5808 | Test Acc: 0.9062 | Test F1: 0.8988
Train time on cuda: 333.4308962999985
Inference Loss: 0.5639, Accuracy: 90.14%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▇████
test_f1,▁▆▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆▇████
test_recall,▁▆▇████
train_acc,▁▇▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.90139
inference_loss,0.5639
test_acc,0.90625
test_f1,0.89882
test_loss,0.58076
test_precision,0.91051
test_recall,0.9
train_acc,0.90139
train_f1,0.90156


wandb: Agent Starting Run: g5h54o59 with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0006183144043410278
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.7395 | Train Acc: 0.4597 | Train F1: 0.4536 | Test Loss: 1.3006 | Test Acc: 0.6771 | Test F1: 0.6436
Epoch 2/7 | Train Loss: 1.0531 | Train Acc: 0.7278 | Train F1: 0.7121 | Test Loss: 0.8711 | Test Acc: 0.8385 | Test F1: 0.8176
Epoch 3/7 | Train Loss: 0.7622 | Train Acc: 0.8500 | Train F1: 0.8492 | Test Loss: 0.7026 | Test Acc: 0.8594 | Test F1: 0.8479
Epoch 4/7 | Train Loss: 0.6351 | Train Acc: 0.8778 | Train F1: 0.8779 | Test Loss: 0.6352 | Test Acc: 0.8490 | Test F1: 0.8663
Epoch 5/7 | Train Loss: 0.5832 | Train Acc: 0.8792 | Train F1: 0.8788 | Test Loss: 0.5993 | Test Acc: 0.8646 | Test F1: 0.8692
Epoch 6/7 | Train Loss: 0.5661 | Train Acc: 0.8847 | Train F1: 0.8849 | Test Loss: 0.5995 | Test Acc: 0.8646 | Test F1: 0.8692
Epoch 7/7 | Train Loss: 0.5661 | Train Acc: 0.8847 | Train F1: 0.8849 | Test Loss: 0.6376 | Test Acc: 0.8490 | Test F1: 0.8692
Train time on cuda: 333.06515769999896
Inference Loss: 0.5662, Accuracy: 88.47%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▇█▇██▇
test_f1,▁▆▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆▇████
test_recall,▁▆▇████
train_acc,▁▅▇████
train_f1,▁▅▇████

0,1
epoch,7
inference_accuracy,0.88472
inference_loss,0.56618
test_acc,0.84896
test_f1,0.86916
test_loss,0.63765
test_precision,0.87936
test_recall,0.87222
train_acc,0.88472
train_f1,0.88487


wandb: Agent Starting Run: lp5u0ifl with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0002671917412030519
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.8033 | Train Acc: 0.4000 | Train F1: 0.3927 | Test Loss: 1.4248 | Test Acc: 0.6467 | Test F1: 0.6262
Epoch 2/7 | Train Loss: 1.2104 | Train Acc: 0.7389 | Train F1: 0.7280 | Test Loss: 1.0595 | Test Acc: 0.7935 | Test F1: 0.7879
Epoch 3/7 | Train Loss: 0.9404 | Train Acc: 0.8153 | Train F1: 0.8110 | Test Loss: 0.8864 | Test Acc: 0.8315 | Test F1: 0.8396
Epoch 4/7 | Train Loss: 0.8082 | Train Acc: 0.8556 | Train F1: 0.8537 | Test Loss: 0.7880 | Test Acc: 0.8696 | Test F1: 0.8618
Epoch 5/7 | Train Loss: 0.7447 | Train Acc: 0.8736 | Train F1: 0.8729 | Test Loss: 0.7624 | Test Acc: 0.8750 | Test F1: 0.8744
Epoch 6/7 | Train Loss: 0.7251 | Train Acc: 0.8764 | Train F1: 0.8757 | Test Loss: 0.7655 | Test Acc: 0.8750 | Test F1: 0.8744
Epoch 7/7 | Train Loss: 0.7251 | Train Acc: 0.8764 | Train F1: 0.8757 | Test Loss: 0.7544 | Test Acc: 0.8804 | Test F1: 0.8744
Train time on cuda: 331.7432413000006
Inference Loss: 0.7252, Accuracy: 87.64%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇████
test_f1,▁▆▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆▇████
test_recall,▁▅▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.87639
inference_loss,0.72516
test_acc,0.88043
test_f1,0.87437
test_loss,0.75442
test_precision,0.8856
test_recall,0.87778
train_acc,0.87639
train_f1,0.87571


wandb: Agent Starting Run: 86bjdskb with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0003806441092449888
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.8102 | Train Acc: 0.4375 | Train F1: 0.4391 | Test Loss: 1.4554 | Test Acc: 0.6927 | Test F1: 0.6651
Epoch 2/7 | Train Loss: 1.2689 | Train Acc: 0.7806 | Train F1: 0.7783 | Test Loss: 1.1384 | Test Acc: 0.8177 | Test F1: 0.7995
Epoch 3/7 | Train Loss: 0.9864 | Train Acc: 0.8347 | Train F1: 0.8320 | Test Loss: 0.9390 | Test Acc: 0.8802 | Test F1: 0.8687
Epoch 4/7 | Train Loss: 0.8517 | Train Acc: 0.8514 | Train F1: 0.8492 | Test Loss: 0.8236 | Test Acc: 0.8802 | Test F1: 0.8703
Epoch 5/7 | Train Loss: 0.7868 | Train Acc: 0.8597 | Train F1: 0.8578 | Test Loss: 0.8093 | Test Acc: 0.8906 | Test F1: 0.8818
Epoch 6/7 | Train Loss: 0.7654 | Train Acc: 0.8653 | Train F1: 0.8635 | Test Loss: 0.8008 | Test Acc: 0.8906 | Test F1: 0.8818
Epoch 7/7 | Train Loss: 0.7654 | Train Acc: 0.8653 | Train F1: 0.8635 | Test Loss: 0.7936 | Test Acc: 0.8906 | Test F1: 0.8818
Train time on cuda: 333.7196452999997
Inference Loss: 0.7656, Accuracy: 86.53%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅█████
test_f1,▁▅█████
test_loss,█▅▃▁▁▁▁
test_precision,▁▆█████
test_recall,▁▅█████
train_acc,▁▇█████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.86528
inference_loss,0.76555
test_acc,0.89062
test_f1,0.88177
test_loss,0.79361
test_precision,0.89608
test_recall,0.88333
train_acc,0.86528
train_f1,0.86348


wandb: Agent Starting Run: mde64h4x with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0007553294626151508
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.6814 | Train Acc: 0.5139 | Train F1: 0.5077 | Test Loss: 1.1396 | Test Acc: 0.8021 | Test F1: 0.8037
Epoch 2/7 | Train Loss: 0.8722 | Train Acc: 0.8319 | Train F1: 0.8293 | Test Loss: 0.6983 | Test Acc: 0.8698 | Test F1: 0.8611
Epoch 3/7 | Train Loss: 0.6170 | Train Acc: 0.8875 | Train F1: 0.8880 | Test Loss: 0.5718 | Test Acc: 0.9167 | Test F1: 0.9086
Epoch 4/7 | Train Loss: 0.5156 | Train Acc: 0.9000 | Train F1: 0.9009 | Test Loss: 0.4859 | Test Acc: 0.9375 | Test F1: 0.9337
Epoch 5/7 | Train Loss: 0.4723 | Train Acc: 0.9069 | Train F1: 0.9075 | Test Loss: 0.4771 | Test Acc: 0.9167 | Test F1: 0.9282
Epoch 6/7 | Train Loss: 0.4583 | Train Acc: 0.9083 | Train F1: 0.9091 | Test Loss: 0.4752 | Test Acc: 0.9167 | Test F1: 0.9282
Epoch 7/7 | Train Loss: 0.4583 | Train Acc: 0.9083 | Train F1: 0.9091 | Test Loss: 0.4772 | Test Acc: 0.9167 | Test F1: 0.9282
Train time on cuda: 333.2308519999988
Inference Loss: 0.4584, Accuracy: 90.83%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇█▇▇▇
test_f1,▁▄▇████
test_loss,█▃▂▁▁▁▁
test_precision,▁▄▇████
test_recall,▁▄▇████
train_acc,▁▇█████
train_f1,▁▇█████

0,1
epoch,7
inference_accuracy,0.90833
inference_loss,0.45839
test_acc,0.91667
test_f1,0.92817
test_loss,0.4772
test_precision,0.93495
test_recall,0.92778
train_acc,0.90833
train_f1,0.90908


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: 4z1gutb4 with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0006291835205152368
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.6174 | Train Acc: 0.4889 | Train F1: 0.4847 | Test Loss: 0.9361 | Test Acc: 0.7717 | Test F1: 0.7538
Epoch 2/7 | Train Loss: 0.7213 | Train Acc: 0.8500 | Train F1: 0.8464 | Test Loss: 0.5737 | Test Acc: 0.8804 | Test F1: 0.8759
Epoch 3/7 | Train Loss: 0.5154 | Train Acc: 0.8764 | Train F1: 0.8744 | Test Loss: 0.4670 | Test Acc: 0.9022 | Test F1: 0.8999
Epoch 4/7 | Train Loss: 0.4391 | Train Acc: 0.9083 | Train F1: 0.9085 | Test Loss: 0.4283 | Test Acc: 0.9076 | Test F1: 0.9017
Epoch 5/7 | Train Loss: 0.4032 | Train Acc: 0.9222 | Train F1: 0.9229 | Test Loss: 0.4040 | Test Acc: 0.9293 | Test F1: 0.9285
Epoch 6/7 | Train Loss: 0.3880 | Train Acc: 0.9278 | Train F1: 0.9284 | Test Loss: 0.4028 | Test Acc: 0.9239 | Test F1: 0.9285
Epoch 7/7 | Train Loss: 0.3880 | Train Acc: 0.9278 | Train F1: 0.9284 | Test Loss: 0.4134 | Test Acc: 0.9239 | Test F1: 0.9285
Train time on cuda: 331.3777563000003
Inference Loss: 0.3880, Accuracy: 92.78%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▇▇███
test_f1,▁▆▇▇███
test_loss,█▃▂▁▁▁▁
test_precision,▁▆▇▇███
test_recall,▁▆▇▇███
train_acc,▁▇▇████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.92778
inference_loss,0.38804
test_acc,0.92391
test_f1,0.92849
test_loss,0.41343
test_precision,0.93513
test_recall,0.92778
train_acc,0.92778
train_f1,0.92844


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: wikab3c1 with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00019890562834047712
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.4093 | Train Acc: 0.1223 | Train F1: 0.0750 | Test Loss: 2.2626 | Test Acc: 0.1708 | Test F1: 0.1220
Epoch 2/7 | Train Loss: 2.1302 | Train Acc: 0.1970 | Train F1: 0.1473 | Test Loss: 2.0638 | Test Acc: 0.2427 | Test F1: 0.2025
Epoch 3/7 | Train Loss: 1.9672 | Train Acc: 0.2609 | Train F1: 0.2339 | Test Loss: 1.9557 | Test Acc: 0.2854 | Test F1: 0.2635
Epoch 4/7 | Train Loss: 1.8717 | Train Acc: 0.3397 | Train F1: 0.3205 | Test Loss: 1.8786 | Test Acc: 0.3542 | Test F1: 0.3216
Epoch 5/7 | Train Loss: 1.8224 | Train Acc: 0.3927 | Train F1: 0.3792 | Test Loss: 1.8470 | Test Acc: 0.3656 | Test F1: 0.3414
Epoch 6/7 | Train Loss: 1.8030 | Train Acc: 0.4022 | Train F1: 0.3923 | Test Loss: 1.8393 | Test Acc: 0.3687 | Test F1: 0.3414
Epoch 7/7 | Train Loss: 1.8035 | Train Acc: 0.4062 | Train F1: 0.3923 | Test Loss: 1.8518 | Test Acc: 0.3687 | Test F1: 0.3414
Train time on cuda: 334.1234492000003
Inference Loss: 1.8022, Accuracy: 40.56%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▄▅▇███
test_f1,▁▄▆▇███
test_loss,█▅▃▂▁▁▁
test_precision,▁▅▆▇███
test_recall,▁▃▅▇███
train_acc,▁▃▄▆███
train_f1,▁▃▅▆███

0,1
epoch,7
inference_accuracy,0.40556
inference_loss,1.8022
test_acc,0.36875
test_f1,0.34136
test_loss,1.85184
test_precision,0.35659
test_recall,0.36667
train_acc,0.40625
train_f1,0.39233


wandb: Agent Starting Run: o4ol8mm3 with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00013743330568609407
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.3559 | Train Acc: 0.1514 | Train F1: 0.0812 | Test Loss: 2.1311 | Test Acc: 0.2240 | Test F1: 0.1694
Epoch 2/7 | Train Loss: 2.0340 | Train Acc: 0.3069 | Train F1: 0.2788 | Test Loss: 1.9530 | Test Acc: 0.3490 | Test F1: 0.3203
Epoch 3/7 | Train Loss: 1.8541 | Train Acc: 0.4444 | Train F1: 0.4089 | Test Loss: 1.8096 | Test Acc: 0.4740 | Test F1: 0.4450
Epoch 4/7 | Train Loss: 1.7352 | Train Acc: 0.5208 | Train F1: 0.4869 | Test Loss: 1.7103 | Test Acc: 0.5156 | Test F1: 0.4888
Epoch 5/7 | Train Loss: 1.6680 | Train Acc: 0.5514 | Train F1: 0.5209 | Test Loss: 1.6454 | Test Acc: 0.5677 | Test F1: 0.5080
Epoch 6/7 | Train Loss: 1.6446 | Train Acc: 0.5625 | Train F1: 0.5334 | Test Loss: 1.6927 | Test Acc: 0.5208 | Test F1: 0.5080
Epoch 7/7 | Train Loss: 1.6446 | Train Acc: 0.5625 | Train F1: 0.5334 | Test Loss: 1.6880 | Test Acc: 0.5208 | Test F1: 0.5080
Train time on cuda: 333.1852203000017
Inference Loss: 1.6446, Accuracy: 56.39%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▄▆▇█▇▇
test_f1,▁▄▇████
test_loss,█▅▃▂▁▂▂
test_precision,▁▄▇████
test_recall,▁▄▇████
train_acc,▁▄▆▇███
train_f1,▁▄▆▇███

0,1
epoch,7
inference_accuracy,0.56389
inference_loss,1.64465
test_acc,0.52083
test_f1,0.50798
test_loss,1.688
test_precision,0.54417
test_recall,0.53889
train_acc,0.5625
train_f1,0.5334


wandb: Agent Starting Run: lnkrq3dn with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0001618562366655912
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.9610 | Train Acc: 0.3000 | Train F1: 0.2930 | Test Loss: 1.5426 | Test Acc: 0.5667 | Test F1: 0.5374
Epoch 2/7 | Train Loss: 1.3120 | Train Acc: 0.6972 | Train F1: 0.6800 | Test Loss: 1.1299 | Test Acc: 0.8111 | Test F1: 0.8062
Epoch 3/7 | Train Loss: 1.0179 | Train Acc: 0.8000 | Train F1: 0.7938 | Test Loss: 0.9403 | Test Acc: 0.8500 | Test F1: 0.8454
Epoch 4/7 | Train Loss: 0.8767 | Train Acc: 0.8417 | Train F1: 0.8389 | Test Loss: 0.8448 | Test Acc: 0.8722 | Test F1: 0.8712
Epoch 5/7 | Train Loss: 0.8076 | Train Acc: 0.8486 | Train F1: 0.8474 | Test Loss: 0.8045 | Test Acc: 0.8722 | Test F1: 0.8712
Epoch 6/7 | Train Loss: 0.7843 | Train Acc: 0.8708 | Train F1: 0.8706 | Test Loss: 0.8045 | Test Acc: 0.8722 | Test F1: 0.8712
Epoch 7/7 | Train Loss: 0.7843 | Train Acc: 0.8708 | Train F1: 0.8706 | Test Loss: 0.8045 | Test Acc: 0.8722 | Test F1: 0.8712
Train time on cuda: 329.1123411000008
Inference Loss: 0.7844, Accuracy: 87.08%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▇▇████
test_f1,▁▇▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆▇████
test_recall,▁▇▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.87083
inference_loss,0.78439
test_acc,0.87222
test_f1,0.8712
test_loss,0.80445
test_precision,0.88153
test_recall,0.87222
train_acc,0.87083
train_f1,0.87057


wandb: Agent Starting Run: eshetp9f with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0003062883089211161
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.7294 | Train Acc: 0.4764 | Train F1: 0.4642 | Test Loss: 1.3179 | Test Acc: 0.6630 | Test F1: 0.6604
Epoch 2/7 | Train Loss: 1.1040 | Train Acc: 0.7819 | Train F1: 0.7754 | Test Loss: 0.9307 | Test Acc: 0.8152 | Test F1: 0.8157
Epoch 3/7 | Train Loss: 0.8386 | Train Acc: 0.8375 | Train F1: 0.8348 | Test Loss: 0.7660 | Test Acc: 0.8859 | Test F1: 0.8808
Epoch 4/7 | Train Loss: 0.7235 | Train Acc: 0.8597 | Train F1: 0.8593 | Test Loss: 0.7007 | Test Acc: 0.8913 | Test F1: 0.8921
Epoch 5/7 | Train Loss: 0.6664 | Train Acc: 0.8639 | Train F1: 0.8634 | Test Loss: 0.6642 | Test Acc: 0.9076 | Test F1: 0.9087
Epoch 6/7 | Train Loss: 0.6475 | Train Acc: 0.8708 | Train F1: 0.8706 | Test Loss: 0.6625 | Test Acc: 0.9130 | Test F1: 0.9087
Epoch 7/7 | Train Loss: 0.6475 | Train Acc: 0.8708 | Train F1: 0.8706 | Test Loss: 0.6534 | Test Acc: 0.9130 | Test F1: 0.9087
Train time on cuda: 332.1473526000009
Inference Loss: 0.6476, Accuracy: 87.22%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇▇███
test_f1,▁▅▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▅▇▇███
test_recall,▁▆▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.87222
inference_loss,0.64759
test_acc,0.91304
test_f1,0.90872
test_loss,0.65343
test_precision,0.92305
test_recall,0.91111
train_acc,0.87083
train_f1,0.87063


wandb: Agent Starting Run: bd70bq14 with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0006016874953397823
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.8331 | Train Acc: 0.3889 | Train F1: 0.3776 | Test Loss: 1.2814 | Test Acc: 0.7500 | Test F1: 0.7203
Epoch 2/7 | Train Loss: 1.0772 | Train Acc: 0.7750 | Train F1: 0.7636 | Test Loss: 0.8743 | Test Acc: 0.8490 | Test F1: 0.8261
Epoch 3/7 | Train Loss: 0.7893 | Train Acc: 0.8333 | Train F1: 0.8289 | Test Loss: 0.6917 | Test Acc: 0.8698 | Test F1: 0.8731
Epoch 4/7 | Train Loss: 0.6622 | Train Acc: 0.8597 | Train F1: 0.8582 | Test Loss: 0.6301 | Test Acc: 0.8750 | Test F1: 0.8790
Epoch 5/7 | Train Loss: 0.6075 | Train Acc: 0.8778 | Train F1: 0.8770 | Test Loss: 0.5848 | Test Acc: 0.9062 | Test F1: 0.8983
Epoch 6/7 | Train Loss: 0.5897 | Train Acc: 0.8792 | Train F1: 0.8784 | Test Loss: 0.5833 | Test Acc: 0.9062 | Test F1: 0.8983
Epoch 7/7 | Train Loss: 0.5897 | Train Acc: 0.8792 | Train F1: 0.8784 | Test Loss: 0.5760 | Test Acc: 0.9062 | Test F1: 0.8983
Train time on cuda: 333.40544469999986
Inference Loss: 0.5897, Accuracy: 87.92%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▆▇███
test_f1,▁▅▇▇███
test_loss,█▄▂▂▁▁▁
test_precision,▁▆▇▇███
test_recall,▁▅▇▇███
train_acc,▁▇▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.87917
inference_loss,0.58973
test_acc,0.90625
test_f1,0.89827
test_loss,0.576
test_precision,0.90651
test_recall,0.9
train_acc,0.87917
train_f1,0.87844


wandb: Agent Starting Run: j6ia2mco with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0002373745664238162
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.7434 | Train Acc: 0.4292 | Train F1: 0.4388 | Test Loss: 1.2263 | Test Acc: 0.7389 | Test F1: 0.7032
Epoch 2/7 | Train Loss: 1.0083 | Train Acc: 0.7986 | Train F1: 0.7892 | Test Loss: 0.8294 | Test Acc: 0.8667 | Test F1: 0.8666
Epoch 3/7 | Train Loss: 0.7489 | Train Acc: 0.8514 | Train F1: 0.8508 | Test Loss: 0.6797 | Test Acc: 0.8889 | Test F1: 0.8872
Epoch 4/7 | Train Loss: 0.6390 | Train Acc: 0.8750 | Train F1: 0.8744 | Test Loss: 0.6062 | Test Acc: 0.8944 | Test F1: 0.8932
Epoch 5/7 | Train Loss: 0.5874 | Train Acc: 0.8903 | Train F1: 0.8905 | Test Loss: 0.5761 | Test Acc: 0.9000 | Test F1: 0.8993
Epoch 6/7 | Train Loss: 0.5686 | Train Acc: 0.8958 | Train F1: 0.8960 | Test Loss: 0.5761 | Test Acc: 0.9000 | Test F1: 0.8993
Epoch 7/7 | Train Loss: 0.5686 | Train Acc: 0.8958 | Train F1: 0.8960 | Test Loss: 0.5761 | Test Acc: 0.9000 | Test F1: 0.8993
Train time on cuda: 329.05971070000123
Inference Loss: 0.5686, Accuracy: 89.58%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▇█████
test_f1,▁▇█████
test_loss,█▄▂▁▁▁▁
test_precision,▁▇█████
test_recall,▁▇█████
train_acc,▁▇▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.89583
inference_loss,0.56859
test_acc,0.9
test_f1,0.89925
test_loss,0.57605
test_precision,0.90607
test_recall,0.9
train_acc,0.89583
train_f1,0.89596


wandb: Agent Starting Run: vfkcub62 with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00040446516639148177
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.1350 | Train Acc: 0.1753 | Train F1: 0.1844 | Test Loss: 1.9185 | Test Acc: 0.3687 | Test F1: 0.3422
Epoch 2/7 | Train Loss: 1.7593 | Train Acc: 0.4742 | Train F1: 0.4399 | Test Loss: 1.6619 | Test Acc: 0.5042 | Test F1: 0.4374
Epoch 3/7 | Train Loss: 1.5206 | Train Acc: 0.6236 | Train F1: 0.5970 | Test Loss: 1.4831 | Test Acc: 0.5958 | Test F1: 0.5643
Epoch 4/7 | Train Loss: 1.3718 | Train Acc: 0.7188 | Train F1: 0.7048 | Test Loss: 1.3614 | Test Acc: 0.7042 | Test F1: 0.6894
Epoch 5/7 | Train Loss: 1.2897 | Train Acc: 0.7704 | Train F1: 0.7680 | Test Loss: 1.3028 | Test Acc: 0.7375 | Test F1: 0.7135
Epoch 6/7 | Train Loss: 1.2598 | Train Acc: 0.7840 | Train F1: 0.7798 | Test Loss: 1.3170 | Test Acc: 0.7219 | Test F1: 0.7135
Epoch 7/7 | Train Loss: 1.2550 | Train Acc: 0.7880 | Train F1: 0.7798 | Test Loss: 1.3140 | Test Acc: 0.7219 | Test F1: 0.7135
Train time on cuda: 334.9865578000008
Inference Loss: 1.2609, Accuracy: 78.61%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▄▅▇███
test_f1,▁▃▅████
test_loss,█▅▃▂▁▁▁
test_precision,▁▄▅▇███
test_recall,▁▃▆████
train_acc,▁▄▆▇███
train_f1,▁▄▆▇███

0,1
epoch,7
inference_accuracy,0.78611
inference_loss,1.26091
test_acc,0.72187
test_f1,0.71347
test_loss,1.31403
test_precision,0.74752
test_recall,0.73333
train_acc,0.78804
train_f1,0.77976


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: tr49u8ml with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0008650703645446204
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.5372 | Train Acc: 0.5750 | Train F1: 0.5749 | Test Loss: 1.0020 | Test Acc: 0.7812 | Test F1: 0.7758
Epoch 2/7 | Train Loss: 0.7717 | Train Acc: 0.8292 | Train F1: 0.8255 | Test Loss: 0.6409 | Test Acc: 0.8542 | Test F1: 0.8537
Epoch 3/7 | Train Loss: 0.5449 | Train Acc: 0.8833 | Train F1: 0.8839 | Test Loss: 0.5090 | Test Acc: 0.8854 | Test F1: 0.8971
Epoch 4/7 | Train Loss: 0.4580 | Train Acc: 0.9042 | Train F1: 0.9043 | Test Loss: 0.4678 | Test Acc: 0.8854 | Test F1: 0.8948
Epoch 5/7 | Train Loss: 0.4202 | Train Acc: 0.9111 | Train F1: 0.9112 | Test Loss: 0.4244 | Test Acc: 0.9167 | Test F1: 0.9116
Epoch 6/7 | Train Loss: 0.4078 | Train Acc: 0.9153 | Train F1: 0.9152 | Test Loss: 0.4183 | Test Acc: 0.9167 | Test F1: 0.9116
Epoch 7/7 | Train Loss: 0.4078 | Train Acc: 0.9153 | Train F1: 0.9152 | Test Loss: 0.4106 | Test Acc: 0.9167 | Test F1: 0.9116
Train time on cuda: 333.0185268000005
Inference Loss: 0.4079, Accuracy: 91.53%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▆▆███
test_f1,▁▅▇▇███
test_loss,█▄▂▂▁▁▁
test_precision,▁▅▇▇███
test_recall,▁▅▇▇███
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.91528
inference_loss,0.40791
test_acc,0.91667
test_f1,0.91158
test_loss,0.41061
test_precision,0.92511
test_recall,0.91111
train_acc,0.91528
train_f1,0.91522


wandb: Agent Starting Run: fdo49k35 with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0006454222272681722
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.1602 | Train Acc: 0.2160 | Train F1: 0.2070 | Test Loss: 1.8276 | Test Acc: 0.4594 | Test F1: 0.3999
Epoch 2/7 | Train Loss: 1.6141 | Train Acc: 0.5856 | Train F1: 0.5335 | Test Loss: 1.4446 | Test Acc: 0.6750 | Test F1: 0.6377
Epoch 3/7 | Train Loss: 1.2807 | Train Acc: 0.7378 | Train F1: 0.7088 | Test Loss: 1.1863 | Test Acc: 0.7625 | Test F1: 0.7432
Epoch 4/7 | Train Loss: 1.1035 | Train Acc: 0.7908 | Train F1: 0.7792 | Test Loss: 1.0526 | Test Acc: 0.8354 | Test F1: 0.8379
Epoch 5/7 | Train Loss: 1.0104 | Train Acc: 0.8207 | Train F1: 0.8166 | Test Loss: 0.9934 | Test Acc: 0.8635 | Test F1: 0.8570
Epoch 6/7 | Train Loss: 0.9829 | Train Acc: 0.8329 | Train F1: 0.8310 | Test Loss: 1.0041 | Test Acc: 0.8510 | Test F1: 0.8570
Epoch 7/7 | Train Loss: 0.9889 | Train Acc: 0.8302 | Train F1: 0.8310 | Test Loss: 0.9896 | Test Acc: 0.8698 | Test F1: 0.8570
Train time on cuda: 334.50812640000004
Inference Loss: 0.9863, Accuracy: 83.19%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▆▇███
test_f1,▁▅▆████
test_loss,█▅▃▂▁▁▁
test_precision,▁▄▇████
test_recall,▁▅▆████
train_acc,▁▅▇████
train_f1,▁▅▇▇███

0,1
epoch,7
inference_accuracy,0.83194
inference_loss,0.98632
test_acc,0.86979
test_f1,0.85699
test_loss,0.98962
test_precision,0.867
test_recall,0.86111
train_acc,0.83016
train_f1,0.83098


wandb: Agent Starting Run: yxvlyxqb with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0004058852678129564
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.1437 | Train Acc: 0.2527 | Train F1: 0.1942 | Test Loss: 1.9007 | Test Acc: 0.3302 | Test F1: 0.2807
Epoch 2/7 | Train Loss: 1.7526 | Train Acc: 0.4402 | Train F1: 0.3956 | Test Loss: 1.6454 | Test Acc: 0.5052 | Test F1: 0.4681
Epoch 3/7 | Train Loss: 1.5254 | Train Acc: 0.6019 | Train F1: 0.5770 | Test Loss: 1.4582 | Test Acc: 0.6813 | Test F1: 0.6635
Epoch 4/7 | Train Loss: 1.3800 | Train Acc: 0.7052 | Train F1: 0.6963 | Test Loss: 1.3355 | Test Acc: 0.7615 | Test F1: 0.7463
Epoch 5/7 | Train Loss: 1.2938 | Train Acc: 0.7582 | Train F1: 0.7514 | Test Loss: 1.2831 | Test Acc: 0.7740 | Test F1: 0.7694
Epoch 6/7 | Train Loss: 1.2685 | Train Acc: 0.7799 | Train F1: 0.7752 | Test Loss: 1.2892 | Test Acc: 0.7740 | Test F1: 0.7694
Epoch 7/7 | Train Loss: 1.2653 | Train Acc: 0.7785 | Train F1: 0.7752 | Test Loss: 1.2733 | Test Acc: 0.7833 | Test F1: 0.7694
Train time on cuda: 335.7745576000016
Inference Loss: 1.2707, Accuracy: 77.92%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▄▆████
test_f1,▁▄▆████
test_loss,█▅▃▂▁▁▁
test_precision,▁▄▇████
test_recall,▁▄▇████
train_acc,▁▃▆▇███
train_f1,▁▃▆▇███

0,1
epoch,7
inference_accuracy,0.77917
inference_loss,1.27065
test_acc,0.78333
test_f1,0.76944
test_loss,1.27331
test_precision,0.80627
test_recall,0.77222
train_acc,0.77853
train_f1,0.77516


wandb: Agent Starting Run: 2d6dnwnx with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00026733751352771377
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.8040 | Train Acc: 0.4458 | Train F1: 0.4201 | Test Loss: 1.4111 | Test Acc: 0.7011 | Test F1: 0.6836
Epoch 2/7 | Train Loss: 1.2103 | Train Acc: 0.7611 | Train F1: 0.7533 | Test Loss: 1.0255 | Test Acc: 0.8370 | Test F1: 0.8355
Epoch 3/7 | Train Loss: 0.9331 | Train Acc: 0.8181 | Train F1: 0.8135 | Test Loss: 0.8395 | Test Acc: 0.8967 | Test F1: 0.8917
Epoch 4/7 | Train Loss: 0.8044 | Train Acc: 0.8653 | Train F1: 0.8632 | Test Loss: 0.7620 | Test Acc: 0.8913 | Test F1: 0.8926
Epoch 5/7 | Train Loss: 0.7426 | Train Acc: 0.8861 | Train F1: 0.8845 | Test Loss: 0.7185 | Test Acc: 0.9076 | Test F1: 0.9039
Epoch 6/7 | Train Loss: 0.7229 | Train Acc: 0.8847 | Train F1: 0.8834 | Test Loss: 0.7240 | Test Acc: 0.9076 | Test F1: 0.9039
Epoch 7/7 | Train Loss: 0.7229 | Train Acc: 0.8847 | Train F1: 0.8834 | Test Loss: 0.7193 | Test Acc: 0.9076 | Test F1: 0.9039
Train time on cuda: 331.48567980000007
Inference Loss: 0.7230, Accuracy: 88.47%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆█▇███
test_f1,▁▆█████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆█████
test_recall,▁▆█████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.88472
inference_loss,0.72297
test_acc,0.90761
test_f1,0.90394
test_loss,0.71933
test_precision,0.91496
test_recall,0.90556
train_acc,0.88472
train_f1,0.88343


wandb: Agent Starting Run: xmfzizcs with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0009592761216100468
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.8309 | Train Acc: 0.4130 | Train F1: 0.3970 | Test Loss: 1.4615 | Test Acc: 0.5760 | Test F1: 0.5558
Epoch 2/7 | Train Loss: 1.2097 | Train Acc: 0.7255 | Train F1: 0.7150 | Test Loss: 1.0401 | Test Acc: 0.8115 | Test F1: 0.8003
Epoch 3/7 | Train Loss: 0.8919 | Train Acc: 0.8179 | Train F1: 0.8134 | Test Loss: 0.8287 | Test Acc: 0.8500 | Test F1: 0.8447
Epoch 4/7 | Train Loss: 0.7427 | Train Acc: 0.8519 | Train F1: 0.8475 | Test Loss: 0.7473 | Test Acc: 0.8760 | Test F1: 0.8760
Epoch 5/7 | Train Loss: 0.6712 | Train Acc: 0.8655 | Train F1: 0.8612 | Test Loss: 0.7023 | Test Acc: 0.8781 | Test F1: 0.8817
Epoch 6/7 | Train Loss: 0.6524 | Train Acc: 0.8641 | Train F1: 0.8630 | Test Loss: 0.7027 | Test Acc: 0.8844 | Test F1: 0.8817
Epoch 7/7 | Train Loss: 0.6520 | Train Acc: 0.8655 | Train F1: 0.8630 | Test Loss: 0.6984 | Test Acc: 0.8844 | Test F1: 0.8817
Train time on cuda: 333.62744659999953
Inference Loss: 0.6535, Accuracy: 86.39%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▇████
test_f1,▁▆▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆▇████
test_recall,▁▆▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.86389
inference_loss,0.65353
test_acc,0.88438
test_f1,0.88173
test_loss,0.6984
test_precision,0.89755
test_recall,0.88333
train_acc,0.86549
train_f1,0.863


wandb: Agent Starting Run: yf6mg99a with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0004215994079033697
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.9894 | Train Acc: 0.3247 | Train F1: 0.3163 | Test Loss: 1.7973 | Test Acc: 0.4604 | Test F1: 0.4491
Epoch 2/7 | Train Loss: 1.6356 | Train Acc: 0.5788 | Train F1: 0.5600 | Test Loss: 1.5356 | Test Acc: 0.6177 | Test F1: 0.6186
Epoch 3/7 | Train Loss: 1.4052 | Train Acc: 0.7188 | Train F1: 0.7106 | Test Loss: 1.3548 | Test Acc: 0.7406 | Test F1: 0.7473
Epoch 4/7 | Train Loss: 1.2595 | Train Acc: 0.7826 | Train F1: 0.7755 | Test Loss: 1.2491 | Test Acc: 0.7948 | Test F1: 0.7905
Epoch 5/7 | Train Loss: 1.1821 | Train Acc: 0.8084 | Train F1: 0.8044 | Test Loss: 1.1929 | Test Acc: 0.8063 | Test F1: 0.7960
Epoch 6/7 | Train Loss: 1.1639 | Train Acc: 0.8152 | Train F1: 0.8164 | Test Loss: 1.1965 | Test Acc: 0.8063 | Test F1: 0.7960
Epoch 7/7 | Train Loss: 1.1611 | Train Acc: 0.8207 | Train F1: 0.8164 | Test Loss: 1.2149 | Test Acc: 0.7844 | Test F1: 0.7960
Train time on cuda: 339.4405071000001
Inference Loss: 1.1610, Accuracy: 81.81%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▄▇████
test_f1,▁▄▇████
test_loss,█▅▃▂▁▁▁
test_precision,▁▅▇████
test_recall,▁▄▇████
train_acc,▁▅▇▇███
train_f1,▁▄▇▇███

0,1
epoch,7
inference_accuracy,0.81806
inference_loss,1.16104
test_acc,0.78437
test_f1,0.79602
test_loss,1.21492
test_precision,0.79924
test_recall,0.8
train_acc,0.82065
train_f1,0.81639


wandb: Agent Starting Run: 1ibop3xu with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00012118202481831978
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.1135 | Train Acc: 0.2722 | Train F1: 0.2408 | Test Loss: 1.7237 | Test Acc: 0.4556 | Test F1: 0.4030
Epoch 2/7 | Train Loss: 1.5481 | Train Acc: 0.5806 | Train F1: 0.5566 | Test Loss: 1.3505 | Test Acc: 0.7278 | Test F1: 0.7212
Epoch 3/7 | Train Loss: 1.2538 | Train Acc: 0.7069 | Train F1: 0.6896 | Test Loss: 1.1461 | Test Acc: 0.8167 | Test F1: 0.8183
Epoch 4/7 | Train Loss: 1.0973 | Train Acc: 0.7569 | Train F1: 0.7449 | Test Loss: 1.0398 | Test Acc: 0.8333 | Test F1: 0.8318
Epoch 5/7 | Train Loss: 1.0158 | Train Acc: 0.7944 | Train F1: 0.7865 | Test Loss: 0.9903 | Test Acc: 0.8444 | Test F1: 0.8444
Epoch 6/7 | Train Loss: 0.9895 | Train Acc: 0.8056 | Train F1: 0.8001 | Test Loss: 0.9903 | Test Acc: 0.8444 | Test F1: 0.8444
Epoch 7/7 | Train Loss: 0.9895 | Train Acc: 0.8056 | Train F1: 0.8001 | Test Loss: 0.9903 | Test Acc: 0.8444 | Test F1: 0.8444
Train time on cuda: 337.94926750000013
Inference Loss: 0.9895, Accuracy: 80.56%


VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▇████
test_f1,▁▆█████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆█████
test_recall,▁▆█████
train_acc,▁▅▇▇███
train_f1,▁▅▇▇███

0,1
epoch,7
inference_accuracy,0.80556
inference_loss,0.9895
test_acc,0.84444
test_f1,0.84435
test_loss,0.99028
test_precision,0.86643
test_recall,0.84444
train_acc,0.80556
train_f1,0.80014


wandb: Agent Starting Run: woortx1s with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.000979720296279491
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.1598 | Train Acc: 0.6694 | Train F1: 0.6602 | Test Loss: 0.5950 | Test Acc: 0.8696 | Test F1: 0.8667
Epoch 2/7 | Train Loss: 0.4781 | Train Acc: 0.8819 | Train F1: 0.8818 | Test Loss: 0.3727 | Test Acc: 0.9348 | Test F1: 0.9338
Epoch 3/7 | Train Loss: 0.3470 | Train Acc: 0.9250 | Train F1: 0.9251 | Test Loss: 0.3526 | Test Acc: 0.9293 | Test F1: 0.9312
Epoch 4/7 | Train Loss: 0.3036 | Train Acc: 0.9333 | Train F1: 0.9340 | Test Loss: 0.2827 | Test Acc: 0.9565 | Test F1: 0.9555
Epoch 5/7 | Train Loss: 0.2659 | Train Acc: 0.9458 | Train F1: 0.9463 | Test Loss: 0.2784 | Test Acc: 0.9674 | Test F1: 0.9670
Epoch 6/7 | Train Loss: 0.2549 | Train Acc: 0.9486 | Train F1: 0.9490 | Test Loss: 0.2791 | Test Acc: 0.9620 | Test F1: 0.9670
Epoch 7/7 | Train Loss: 0.2549 | Train Acc: 0.9486 | Train F1: 0.9490 | Test Loss: 0.2725 | Test Acc: 0.9674 | Test F1: 0.9670
Train time on cuda: 341.3470094999975
Inference Loss: 0.2549, Accuracy: 94.86%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▅▇███
test_f1,▁▆▆▇███
test_loss,█▃▃▁▁▁▁
test_precision,▁▆▆▇███
test_recall,▁▆▅▇███
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.94861
inference_loss,0.25489
test_acc,0.96739
test_f1,0.96702
test_loss,0.27248
test_precision,0.972
test_recall,0.96667
train_acc,0.94861
train_f1,0.94897


wandb: Agent Starting Run: ls6zy1xe with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0009334948623108728
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.6650 | Train Acc: 0.5347 | Train F1: 0.5204 | Test Loss: 0.9902 | Test Acc: 0.8073 | Test F1: 0.7863
Epoch 2/7 | Train Loss: 0.7810 | Train Acc: 0.8431 | Train F1: 0.8402 | Test Loss: 0.6086 | Test Acc: 0.9062 | Test F1: 0.9034
Epoch 3/7 | Train Loss: 0.5405 | Train Acc: 0.8917 | Train F1: 0.8924 | Test Loss: 0.4922 | Test Acc: 0.9167 | Test F1: 0.9112
Epoch 4/7 | Train Loss: 0.4504 | Train Acc: 0.9042 | Train F1: 0.9046 | Test Loss: 0.4274 | Test Acc: 0.9375 | Test F1: 0.9338
Epoch 5/7 | Train Loss: 0.4132 | Train Acc: 0.9139 | Train F1: 0.9147 | Test Loss: 0.4044 | Test Acc: 0.9479 | Test F1: 0.9449
Epoch 6/7 | Train Loss: 0.4001 | Train Acc: 0.9125 | Train F1: 0.9132 | Test Loss: 0.4155 | Test Acc: 0.9479 | Test F1: 0.9449
Epoch 7/7 | Train Loss: 0.4001 | Train Acc: 0.9125 | Train F1: 0.9132 | Test Loss: 0.3934 | Test Acc: 0.9479 | Test F1: 0.9449
Train time on cuda: 341.15109779999693
Inference Loss: 0.4002, Accuracy: 91.25%


VBox(children=(Label(value='0.005 MB of 0.005 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▆▇███
test_f1,▁▆▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▇▇▇███
test_recall,▁▆▆▇███
train_acc,▁▇█████
train_f1,▁▇█████

0,1
epoch,7
inference_accuracy,0.9125
inference_loss,0.40016
test_acc,0.94792
test_f1,0.94486
test_loss,0.39337
test_precision,0.95425
test_recall,0.94444
train_acc,0.9125
train_f1,0.91318


wandb: Agent Starting Run: walgim86 with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0006756955773029842
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.1194 | Train Acc: 0.6583 | Train F1: 0.6585 | Test Loss: 0.6105 | Test Acc: 0.8444 | Test F1: 0.8491
Epoch 2/7 | Train Loss: 0.4572 | Train Acc: 0.8958 | Train F1: 0.8962 | Test Loss: 0.3877 | Test Acc: 0.9167 | Test F1: 0.9187
Epoch 3/7 | Train Loss: 0.3434 | Train Acc: 0.9194 | Train F1: 0.9198 | Test Loss: 0.3292 | Test Acc: 0.9444 | Test F1: 0.9448
Epoch 4/7 | Train Loss: 0.2878 | Train Acc: 0.9417 | Train F1: 0.9420 | Test Loss: 0.2952 | Test Acc: 0.9611 | Test F1: 0.9615
Epoch 5/7 | Train Loss: 0.2622 | Train Acc: 0.9500 | Train F1: 0.9501 | Test Loss: 0.2791 | Test Acc: 0.9611 | Test F1: 0.9615
Epoch 6/7 | Train Loss: 0.2495 | Train Acc: 0.9569 | Train F1: 0.9571 | Test Loss: 0.2791 | Test Acc: 0.9611 | Test F1: 0.9615
Epoch 7/7 | Train Loss: 0.2495 | Train Acc: 0.9569 | Train F1: 0.9571 | Test Loss: 0.2791 | Test Acc: 0.9611 | Test F1: 0.9615
Train time on cuda: 333.47576270000354
Inference Loss: 0.2495, Accuracy: 95.69%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇████
test_f1,▁▅▇████
test_loss,█▃▂▁▁▁▁
test_precision,▁▄▇████
test_recall,▁▅▇████
train_acc,▁▇▇████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.95694
inference_loss,0.24955
test_acc,0.96111
test_f1,0.96153
test_loss,0.2791
test_precision,0.96861
test_recall,0.96111
train_acc,0.95694
train_f1,0.95708


wandb: Agent Starting Run: qyvwyr9k with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0008668945709425641
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.3093 | Train Acc: 0.6389 | Train F1: 0.6402 | Test Loss: 0.6804 | Test Acc: 0.8478 | Test F1: 0.8433
Epoch 2/7 | Train Loss: 0.5471 | Train Acc: 0.8611 | Train F1: 0.8602 | Test Loss: 0.4443 | Test Acc: 0.9130 | Test F1: 0.9122
Epoch 3/7 | Train Loss: 0.3955 | Train Acc: 0.9111 | Train F1: 0.9123 | Test Loss: 0.3542 | Test Acc: 0.9402 | Test F1: 0.9395
Epoch 4/7 | Train Loss: 0.3365 | Train Acc: 0.9333 | Train F1: 0.9340 | Test Loss: 0.3332 | Test Acc: 0.9348 | Test F1: 0.9345
Epoch 5/7 | Train Loss: 0.3072 | Train Acc: 0.9472 | Train F1: 0.9477 | Test Loss: 0.3099 | Test Acc: 0.9457 | Test F1: 0.9449
Epoch 6/7 | Train Loss: 0.2949 | Train Acc: 0.9500 | Train F1: 0.9502 | Test Loss: 0.3188 | Test Acc: 0.9402 | Test F1: 0.9449
Epoch 7/7 | Train Loss: 0.2949 | Train Acc: 0.9500 | Train F1: 0.9502 | Test Loss: 0.3267 | Test Acc: 0.9402 | Test F1: 0.9449
Train time on cuda: 336.41117160000067
Inference Loss: 0.2950, Accuracy: 95.00%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆█▇███
test_f1,▁▆█▇███
test_loss,█▄▂▁▁▁▁
test_precision,▁▆▇████
test_recall,▁▆█▇███
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.95
inference_loss,0.29498
test_acc,0.94022
test_f1,0.94486
test_loss,0.32666
test_precision,0.95046
test_recall,0.94444
train_acc,0.95
train_f1,0.95023


wandb: Agent Starting Run: eex0tnyv with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00088616162867119
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.7830 | Train Acc: 0.4185 | Train F1: 0.4071 | Test Loss: 1.4697 | Test Acc: 0.6417 | Test F1: 0.6339
Epoch 2/7 | Train Loss: 1.2009 | Train Acc: 0.7527 | Train F1: 0.7453 | Test Loss: 1.0312 | Test Acc: 0.8208 | Test F1: 0.8191
Epoch 3/7 | Train Loss: 0.9007 | Train Acc: 0.8397 | Train F1: 0.8382 | Test Loss: 0.8400 | Test Acc: 0.8240 | Test F1: 0.8206
Epoch 4/7 | Train Loss: 0.7591 | Train Acc: 0.8492 | Train F1: 0.8479 | Test Loss: 0.7606 | Test Acc: 0.8490 | Test F1: 0.8555
Epoch 5/7 | Train Loss: 0.6977 | Train Acc: 0.8601 | Train F1: 0.8606 | Test Loss: 0.7117 | Test Acc: 0.8521 | Test F1: 0.8557
Epoch 6/7 | Train Loss: 0.6769 | Train Acc: 0.8655 | Train F1: 0.8668 | Test Loss: 0.7209 | Test Acc: 0.8521 | Test F1: 0.8557
Epoch 7/7 | Train Loss: 0.6714 | Train Acc: 0.8641 | Train F1: 0.8668 | Test Loss: 0.6999 | Test Acc: 0.8583 | Test F1: 0.8557
Train time on cuda: 338.5885971000025
Inference Loss: 0.6722, Accuracy: 86.67%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▇▇████
test_f1,▁▇▇████
test_loss,█▄▂▂▁▁▁
test_precision,▁▇▇████
test_recall,▁▇▇████
train_acc,▁▆█████
train_f1,▁▆█████

0,1
epoch,7
inference_accuracy,0.86667
inference_loss,0.67216
test_acc,0.85833
test_f1,0.85567
test_loss,0.69987
test_precision,0.8622
test_recall,0.85556
train_acc,0.86413
train_f1,0.86681


wandb: Agent Starting Run: 1765aixf with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0007674139383756299
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.7240 | Train Acc: 0.4639 | Train F1: 0.4569 | Test Loss: 1.1581 | Test Acc: 0.7083 | Test F1: 0.7141
Epoch 2/7 | Train Loss: 0.9280 | Train Acc: 0.8097 | Train F1: 0.8088 | Test Loss: 0.7682 | Test Acc: 0.8385 | Test F1: 0.8603
Epoch 3/7 | Train Loss: 0.6453 | Train Acc: 0.8708 | Train F1: 0.8702 | Test Loss: 0.6052 | Test Acc: 0.8802 | Test F1: 0.8710
Epoch 4/7 | Train Loss: 0.5408 | Train Acc: 0.8861 | Train F1: 0.8868 | Test Loss: 0.5173 | Test Acc: 0.9115 | Test F1: 0.9068
Epoch 5/7 | Train Loss: 0.4942 | Train Acc: 0.8972 | Train F1: 0.8979 | Test Loss: 0.4772 | Test Acc: 0.9167 | Test F1: 0.9127
Epoch 6/7 | Train Loss: 0.4797 | Train Acc: 0.9000 | Train F1: 0.9008 | Test Loss: 0.4799 | Test Acc: 0.9167 | Test F1: 0.9127
Epoch 7/7 | Train Loss: 0.4797 | Train Acc: 0.9000 | Train F1: 0.9008 | Test Loss: 0.5002 | Test Acc: 0.9010 | Test F1: 0.9127
Train time on cuda: 338.205222200002
Inference Loss: 0.4797, Accuracy: 89.86%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇███▇
test_f1,▁▆▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆▆████
test_recall,▁▆▇████
train_acc,▁▇█████
train_f1,▁▇█████

0,1
epoch,7
inference_accuracy,0.89861
inference_loss,0.47971
test_acc,0.90104
test_f1,0.91268
test_loss,0.50021
test_precision,0.92207
test_recall,0.91111
train_acc,0.9
train_f1,0.90077


wandb: Agent Starting Run: 10msftnj with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0003861431334019646
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.0714 | Train Acc: 0.2432 | Train F1: 0.2330 | Test Loss: 1.8172 | Test Acc: 0.4510 | Test F1: 0.3791
Epoch 2/7 | Train Loss: 1.7307 | Train Acc: 0.4932 | Train F1: 0.4290 | Test Loss: 1.5868 | Test Acc: 0.5333 | Test F1: 0.4716
Epoch 3/7 | Train Loss: 1.5195 | Train Acc: 0.6087 | Train F1: 0.5719 | Test Loss: 1.4187 | Test Acc: 0.6979 | Test F1: 0.6808
Epoch 4/7 | Train Loss: 1.3753 | Train Acc: 0.6916 | Train F1: 0.6808 | Test Loss: 1.3374 | Test Acc: 0.7573 | Test F1: 0.7606
Epoch 5/7 | Train Loss: 1.2986 | Train Acc: 0.7418 | Train F1: 0.7388 | Test Loss: 1.2851 | Test Acc: 0.7708 | Test F1: 0.7737
Epoch 6/7 | Train Loss: 1.2685 | Train Acc: 0.7609 | Train F1: 0.7587 | Test Loss: 1.2802 | Test Acc: 0.7771 | Test F1: 0.7737
Epoch 7/7 | Train Loss: 1.2732 | Train Acc: 0.7595 | Train F1: 0.7587 | Test Loss: 1.2850 | Test Acc: 0.7708 | Test F1: 0.7737
Train time on cuda: 341.07740000000194
Inference Loss: 1.2685, Accuracy: 76.11%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▃▆████
test_f1,▁▃▆████
test_loss,█▅▃▂▁▁▁
test_precision,▁▄▇████
test_recall,▁▃▆████
train_acc,▁▄▆▇███
train_f1,▁▄▆▇███

0,1
epoch,7
inference_accuracy,0.76111
inference_loss,1.26854
test_acc,0.77083
test_f1,0.77372
test_loss,1.28496
test_precision,0.79494
test_recall,0.77222
train_acc,0.75951
train_f1,0.7587


wandb: Agent Starting Run: upqmsy9n with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0001830913490921837
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.8106 | Train Acc: 0.4056 | Train F1: 0.3964 | Test Loss: 1.3846 | Test Acc: 0.7111 | Test F1: 0.6802
Epoch 2/7 | Train Loss: 1.1504 | Train Acc: 0.7708 | Train F1: 0.7606 | Test Loss: 0.9968 | Test Acc: 0.8389 | Test F1: 0.8338
Epoch 3/7 | Train Loss: 0.8797 | Train Acc: 0.8444 | Train F1: 0.8434 | Test Loss: 0.8275 | Test Acc: 0.8778 | Test F1: 0.8732
Epoch 4/7 | Train Loss: 0.7528 | Train Acc: 0.8764 | Train F1: 0.8770 | Test Loss: 0.7428 | Test Acc: 0.8833 | Test F1: 0.8764
Epoch 5/7 | Train Loss: 0.6933 | Train Acc: 0.8694 | Train F1: 0.8680 | Test Loss: 0.7037 | Test Acc: 0.8889 | Test F1: 0.8853
Epoch 6/7 | Train Loss: 0.6725 | Train Acc: 0.8806 | Train F1: 0.8799 | Test Loss: 0.7037 | Test Acc: 0.8889 | Test F1: 0.8853
Epoch 7/7 | Train Loss: 0.6725 | Train Acc: 0.8806 | Train F1: 0.8799 | Test Loss: 0.7037 | Test Acc: 0.8889 | Test F1: 0.8853
Train time on cuda: 333.40959060000023
Inference Loss: 0.6725, Accuracy: 88.06%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆█████
test_f1,▁▆█████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆█████
test_recall,▁▆█████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.88056
inference_loss,0.67254
test_acc,0.88889
test_f1,0.88529
test_loss,0.70369
test_precision,0.89531
test_recall,0.88889
train_acc,0.88056
train_f1,0.87985


wandb: Agent Starting Run: chia55sl with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00029423438412206916
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.8828 | Train Acc: 0.3708 | Train F1: 0.3666 | Test Loss: 1.4186 | Test Acc: 0.7011 | Test F1: 0.6651
Epoch 2/7 | Train Loss: 1.1869 | Train Acc: 0.7708 | Train F1: 0.7606 | Test Loss: 1.0138 | Test Acc: 0.8043 | Test F1: 0.7822
Epoch 3/7 | Train Loss: 0.9019 | Train Acc: 0.8250 | Train F1: 0.8214 | Test Loss: 0.8310 | Test Acc: 0.8261 | Test F1: 0.8125
Epoch 4/7 | Train Loss: 0.7709 | Train Acc: 0.8597 | Train F1: 0.8573 | Test Loss: 0.7478 | Test Acc: 0.8587 | Test F1: 0.8559
Epoch 5/7 | Train Loss: 0.7092 | Train Acc: 0.8764 | Train F1: 0.8746 | Test Loss: 0.7152 | Test Acc: 0.8641 | Test F1: 0.8560
Epoch 6/7 | Train Loss: 0.6897 | Train Acc: 0.8806 | Train F1: 0.8793 | Test Loss: 0.7133 | Test Acc: 0.8641 | Test F1: 0.8560
Epoch 7/7 | Train Loss: 0.6897 | Train Acc: 0.8806 | Train F1: 0.8793 | Test Loss: 0.7121 | Test Acc: 0.8587 | Test F1: 0.8560
Train time on cuda: 334.71113360000163
Inference Loss: 0.6897, Accuracy: 88.06%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▆████
test_f1,▁▅▆████
test_loss,█▄▂▁▁▁▁
test_precision,▁▅▆████
test_recall,▁▅▆████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.88056
inference_loss,0.68975
test_acc,0.8587
test_f1,0.85603
test_loss,0.71211
test_precision,0.86879
test_recall,0.86111
train_acc,0.88056
train_f1,0.87931


wandb: Agent Starting Run: ky230ct1 with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0007618486480014255
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.2562 | Train Acc: 0.6472 | Train F1: 0.6396 | Test Loss: 0.6901 | Test Acc: 0.8478 | Test F1: 0.8380
Epoch 2/7 | Train Loss: 0.5578 | Train Acc: 0.8653 | Train F1: 0.8649 | Test Loss: 0.4500 | Test Acc: 0.9076 | Test F1: 0.9068
Epoch 3/7 | Train Loss: 0.4192 | Train Acc: 0.9042 | Train F1: 0.9042 | Test Loss: 0.4111 | Test Acc: 0.9239 | Test F1: 0.9348
Epoch 4/7 | Train Loss: 0.3573 | Train Acc: 0.9139 | Train F1: 0.9147 | Test Loss: 0.3368 | Test Acc: 0.9511 | Test F1: 0.9500
Epoch 5/7 | Train Loss: 0.3274 | Train Acc: 0.9278 | Train F1: 0.9283 | Test Loss: 0.3302 | Test Acc: 0.9511 | Test F1: 0.9505
Epoch 6/7 | Train Loss: 0.3171 | Train Acc: 0.9347 | Train F1: 0.9352 | Test Loss: 0.3322 | Test Acc: 0.9511 | Test F1: 0.9505
Epoch 7/7 | Train Loss: 0.3171 | Train Acc: 0.9347 | Train F1: 0.9352 | Test Loss: 0.3380 | Test Acc: 0.9457 | Test F1: 0.9505
Train time on cuda: 333.09147870000015
Inference Loss: 0.3172, Accuracy: 93.47%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▆████
test_f1,▁▅▇████
test_loss,█▃▃▁▁▁▁
test_precision,▁▅▇████
test_recall,▁▅▇████
train_acc,▁▆▇▇███
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.93472
inference_loss,0.31718
test_acc,0.94565
test_f1,0.95055
test_loss,0.33795
test_precision,0.95731
test_recall,0.95
train_acc,0.93472
train_f1,0.93524


wandb: Agent Starting Run: ofgvu6k6 with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00031579423438507796
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.1509 | Train Acc: 0.1997 | Train F1: 0.1792 | Test Loss: 1.9378 | Test Acc: 0.3656 | Test F1: 0.3377
Epoch 2/7 | Train Loss: 1.8603 | Train Acc: 0.3927 | Train F1: 0.3660 | Test Loss: 1.7065 | Test Acc: 0.5427 | Test F1: 0.5328
Epoch 3/7 | Train Loss: 1.6622 | Train Acc: 0.5530 | Train F1: 0.5365 | Test Loss: 1.5555 | Test Acc: 0.6448 | Test F1: 0.6366
Epoch 4/7 | Train Loss: 1.5404 | Train Acc: 0.6413 | Train F1: 0.6323 | Test Loss: 1.4731 | Test Acc: 0.6708 | Test F1: 0.6649
Epoch 5/7 | Train Loss: 1.4686 | Train Acc: 0.6739 | Train F1: 0.6643 | Test Loss: 1.4373 | Test Acc: 0.6823 | Test F1: 0.6867
Epoch 6/7 | Train Loss: 1.4442 | Train Acc: 0.6861 | Train F1: 0.6800 | Test Loss: 1.4317 | Test Acc: 0.6917 | Test F1: 0.6867
Epoch 7/7 | Train Loss: 1.4452 | Train Acc: 0.6861 | Train F1: 0.6800 | Test Loss: 1.4374 | Test Acc: 0.6854 | Test F1: 0.6867
Train time on cuda: 336.08404940000037
Inference Loss: 1.4411, Accuracy: 68.89%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇████
test_f1,▁▅▇████
test_loss,█▅▃▂▁▁▁
test_precision,▁▆▇████
test_recall,▁▅▇████
train_acc,▁▄▆▇███
train_f1,▁▄▆▇███

0,1
epoch,7
inference_accuracy,0.68889
inference_loss,1.4411
test_acc,0.68542
test_f1,0.68674
test_loss,1.43745
test_precision,0.69432
test_recall,0.69444
train_acc,0.68614
train_f1,0.68004


wandb: Agent Starting Run: gq3s98df with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0002845877918874019
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.6225 | Train Acc: 0.5389 | Train F1: 0.5312 | Test Loss: 1.0739 | Test Acc: 0.8222 | Test F1: 0.8135
Epoch 2/7 | Train Loss: 0.8731 | Train Acc: 0.8333 | Train F1: 0.8309 | Test Loss: 0.7151 | Test Acc: 0.8833 | Test F1: 0.8807
Epoch 3/7 | Train Loss: 0.6447 | Train Acc: 0.8694 | Train F1: 0.8694 | Test Loss: 0.5894 | Test Acc: 0.9111 | Test F1: 0.9130
Epoch 4/7 | Train Loss: 0.5503 | Train Acc: 0.8847 | Train F1: 0.8849 | Test Loss: 0.5270 | Test Acc: 0.9278 | Test F1: 0.9290
Epoch 5/7 | Train Loss: 0.5069 | Train Acc: 0.8917 | Train F1: 0.8922 | Test Loss: 0.4971 | Test Acc: 0.9278 | Test F1: 0.9290
Epoch 6/7 | Train Loss: 0.4900 | Train Acc: 0.8972 | Train F1: 0.8977 | Test Loss: 0.4971 | Test Acc: 0.9278 | Test F1: 0.9290
Epoch 7/7 | Train Loss: 0.4900 | Train Acc: 0.8972 | Train F1: 0.8977 | Test Loss: 0.4971 | Test Acc: 0.9278 | Test F1: 0.9290
Train time on cuda: 330.7929172000004
Inference Loss: 0.4900, Accuracy: 89.58%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇████
test_f1,▁▅▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▅▇████
test_recall,▁▅▇████
train_acc,▁▇▇████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.89583
inference_loss,0.49001
test_acc,0.92778
test_f1,0.92897
test_loss,0.49714
test_precision,0.93899
test_recall,0.92778
train_acc,0.89722
train_f1,0.89767


wandb: Agent Starting Run: 1m1uox1g with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0007858146313717788
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.3437 | Train Acc: 0.5903 | Train F1: 0.5769 | Test Loss: 0.7592 | Test Acc: 0.8098 | Test F1: 0.8095
Epoch 2/7 | Train Loss: 0.5928 | Train Acc: 0.8472 | Train F1: 0.8472 | Test Loss: 0.4869 | Test Acc: 0.8913 | Test F1: 0.8883
Epoch 3/7 | Train Loss: 0.4303 | Train Acc: 0.8931 | Train F1: 0.8935 | Test Loss: 0.4036 | Test Acc: 0.9076 | Test F1: 0.9089
Epoch 4/7 | Train Loss: 0.3666 | Train Acc: 0.9181 | Train F1: 0.9185 | Test Loss: 0.3565 | Test Acc: 0.9402 | Test F1: 0.9420
Epoch 5/7 | Train Loss: 0.3322 | Train Acc: 0.9292 | Train F1: 0.9296 | Test Loss: 0.3420 | Test Acc: 0.9402 | Test F1: 0.9420
Epoch 6/7 | Train Loss: 0.3210 | Train Acc: 0.9319 | Train F1: 0.9324 | Test Loss: 0.3545 | Test Acc: 0.9348 | Test F1: 0.9420
Epoch 7/7 | Train Loss: 0.3210 | Train Acc: 0.9319 | Train F1: 0.9324 | Test Loss: 0.3390 | Test Acc: 0.9402 | Test F1: 0.9420
Train time on cuda: 333.61773449999964
Inference Loss: 0.3210, Accuracy: 93.19%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▆████
test_f1,▁▅▆████
test_loss,█▃▂▁▁▁▁
test_precision,▁▄▅████
test_recall,▁▅▆████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.93194
inference_loss,0.321
test_acc,0.94022
test_f1,0.942
test_loss,0.33903
test_precision,0.95636
test_recall,0.93889
train_acc,0.93194
train_f1,0.93236


wandb: Agent Starting Run: cje41arc with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0007200071469892749
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.7988 | Train Acc: 0.4321 | Train F1: 0.4346 | Test Loss: 1.5632 | Test Acc: 0.5687 | Test F1: 0.5143
Epoch 2/7 | Train Loss: 1.3419 | Train Acc: 0.6875 | Train F1: 0.6504 | Test Loss: 1.1932 | Test Acc: 0.7219 | Test F1: 0.6726
Epoch 3/7 | Train Loss: 1.0609 | Train Acc: 0.7880 | Train F1: 0.7698 | Test Loss: 1.0011 | Test Acc: 0.7823 | Test F1: 0.7666
Epoch 4/7 | Train Loss: 0.9120 | Train Acc: 0.8261 | Train F1: 0.8234 | Test Loss: 0.9135 | Test Acc: 0.7885 | Test F1: 0.7845
Epoch 5/7 | Train Loss: 0.8481 | Train Acc: 0.8519 | Train F1: 0.8524 | Test Loss: 0.8584 | Test Acc: 0.8052 | Test F1: 0.7961
Epoch 6/7 | Train Loss: 0.8186 | Train Acc: 0.8546 | Train F1: 0.8489 | Test Loss: 0.8610 | Test Acc: 0.8115 | Test F1: 0.7961
Epoch 7/7 | Train Loss: 0.8205 | Train Acc: 0.8492 | Train F1: 0.8489 | Test Loss: 0.8547 | Test Acc: 0.8083 | Test F1: 0.7961
Train time on cuda: 335.81866509999963
Inference Loss: 0.8182, Accuracy: 85.14%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇▇███
test_f1,▁▅▇████
test_loss,█▄▂▂▁▁▁
test_precision,▁▄▇████
test_recall,▁▅▇████
train_acc,▁▅▇████
train_f1,▁▅▇████

0,1
epoch,7
inference_accuracy,0.85139
inference_loss,0.81821
test_acc,0.80833
test_f1,0.79606
test_loss,0.85469
test_precision,0.80834
test_recall,0.80556
train_acc,0.84918
train_f1,0.8489


wandb: Agent Starting Run: 5klty6o3 with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00019933607119239135
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.8561 | Train Acc: 0.4319 | Train F1: 0.3981 | Test Loss: 1.5484 | Test Acc: 0.6630 | Test F1: 0.6450
Epoch 2/7 | Train Loss: 1.3509 | Train Acc: 0.7278 | Train F1: 0.7222 | Test Loss: 1.1996 | Test Acc: 0.7772 | Test F1: 0.7728
Epoch 3/7 | Train Loss: 1.0930 | Train Acc: 0.7986 | Train F1: 0.7917 | Test Loss: 1.0222 | Test Acc: 0.8207 | Test F1: 0.8127
Epoch 4/7 | Train Loss: 0.9579 | Train Acc: 0.8250 | Train F1: 0.8189 | Test Loss: 0.9274 | Test Acc: 0.8424 | Test F1: 0.8380
Epoch 5/7 | Train Loss: 0.8921 | Train Acc: 0.8347 | Train F1: 0.8302 | Test Loss: 0.8926 | Test Acc: 0.8478 | Test F1: 0.8431
Epoch 6/7 | Train Loss: 0.8707 | Train Acc: 0.8403 | Train F1: 0.8369 | Test Loss: 0.8885 | Test Acc: 0.8533 | Test F1: 0.8431
Epoch 7/7 | Train Loss: 0.8707 | Train Acc: 0.8403 | Train F1: 0.8369 | Test Loss: 0.8873 | Test Acc: 0.8533 | Test F1: 0.8431
Train time on cuda: 333.478225400002
Inference Loss: 0.8707, Accuracy: 84.03%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇████
test_f1,▁▆▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆▇████
test_recall,▁▅▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.84028
inference_loss,0.87069
test_acc,0.85326
test_f1,0.84309
test_loss,0.88726
test_precision,0.86183
test_recall,0.85
train_acc,0.84028
train_f1,0.83688


wandb: Agent Starting Run: wlcdsk1b with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.000989043658710982
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.2408 | Train Acc: 0.6250 | Train F1: 0.6198 | Test Loss: 0.6199 | Test Acc: 0.8750 | Test F1: 0.8776
Epoch 2/7 | Train Loss: 0.5046 | Train Acc: 0.8653 | Train F1: 0.8654 | Test Loss: 0.3820 | Test Acc: 0.9348 | Test F1: 0.9353
Epoch 3/7 | Train Loss: 0.3687 | Train Acc: 0.9153 | Train F1: 0.9156 | Test Loss: 0.3151 | Test Acc: 0.9402 | Test F1: 0.9403
Epoch 4/7 | Train Loss: 0.3114 | Train Acc: 0.9250 | Train F1: 0.9252 | Test Loss: 0.2872 | Test Acc: 0.9674 | Test F1: 0.9682
Epoch 5/7 | Train Loss: 0.2816 | Train Acc: 0.9347 | Train F1: 0.9350 | Test Loss: 0.2809 | Test Acc: 0.9565 | Test F1: 0.9626
Epoch 6/7 | Train Loss: 0.2714 | Train Acc: 0.9417 | Train F1: 0.9418 | Test Loss: 0.2734 | Test Acc: 0.9620 | Test F1: 0.9626
Epoch 7/7 | Train Loss: 0.2714 | Train Acc: 0.9417 | Train F1: 0.9418 | Test Loss: 0.2708 | Test Acc: 0.9620 | Test F1: 0.9626
Train time on cuda: 333.6214375999989
Inference Loss: 0.2714, Accuracy: 94.17%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▆█▇██
test_f1,▁▅▆████
test_loss,█▃▂▁▁▁▁
test_precision,▁▅▅█▇▇▇
test_recall,▁▅▆████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.94167
inference_loss,0.27142
test_acc,0.96196
test_f1,0.9626
test_loss,0.2708
test_precision,0.96944
test_recall,0.96111
train_acc,0.94167
train_f1,0.94185


wandb: Agent Starting Run: vkh1g2jw with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0003815161494612449
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.8273 | Train Acc: 0.4222 | Train F1: 0.4305 | Test Loss: 1.3086 | Test Acc: 0.6739 | Test F1: 0.6266
Epoch 2/7 | Train Loss: 1.0407 | Train Acc: 0.7875 | Train F1: 0.7746 | Test Loss: 0.8774 | Test Acc: 0.8424 | Test F1: 0.8471
Epoch 3/7 | Train Loss: 0.7570 | Train Acc: 0.8736 | Train F1: 0.8741 | Test Loss: 0.6956 | Test Acc: 0.8859 | Test F1: 0.8827
Epoch 4/7 | Train Loss: 0.6437 | Train Acc: 0.8792 | Train F1: 0.8785 | Test Loss: 0.6237 | Test Acc: 0.9076 | Test F1: 0.9044
Epoch 5/7 | Train Loss: 0.5889 | Train Acc: 0.8972 | Train F1: 0.8975 | Test Loss: 0.5938 | Test Acc: 0.9185 | Test F1: 0.9175
Epoch 6/7 | Train Loss: 0.5717 | Train Acc: 0.9014 | Train F1: 0.9018 | Test Loss: 0.6080 | Test Acc: 0.9130 | Test F1: 0.9175
Epoch 7/7 | Train Loss: 0.5717 | Train Acc: 0.9014 | Train F1: 0.9018 | Test Loss: 0.6094 | Test Acc: 0.9130 | Test F1: 0.9175
Train time on cuda: 333.8924404999998
Inference Loss: 0.5718, Accuracy: 90.14%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▇████
test_f1,▁▆▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▇▇████
test_recall,▁▆▇████
train_acc,▁▆█████
train_f1,▁▆█████

0,1
epoch,7
inference_accuracy,0.90139
inference_loss,0.57176
test_acc,0.91304
test_f1,0.91746
test_loss,0.60938
test_precision,0.92536
test_recall,0.91667
train_acc,0.90139
train_f1,0.90177


wandb: Agent Starting Run: yzdof8x5 with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0009281918326128488
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.0150 | Train Acc: 0.2989 | Train F1: 0.2940 | Test Loss: 1.6250 | Test Acc: 0.5094 | Test F1: 0.4791
Epoch 2/7 | Train Loss: 1.3600 | Train Acc: 0.6196 | Train F1: 0.5829 | Test Loss: 1.1503 | Test Acc: 0.7333 | Test F1: 0.7094
Epoch 3/7 | Train Loss: 0.9965 | Train Acc: 0.7785 | Train F1: 0.7717 | Test Loss: 0.9282 | Test Acc: 0.8344 | Test F1: 0.8271
Epoch 4/7 | Train Loss: 0.8286 | Train Acc: 0.8505 | Train F1: 0.8487 | Test Loss: 0.8059 | Test Acc: 0.8771 | Test F1: 0.8731
Epoch 5/7 | Train Loss: 0.7434 | Train Acc: 0.8682 | Train F1: 0.8669 | Test Loss: 0.7512 | Test Acc: 0.8844 | Test F1: 0.8843
Epoch 6/7 | Train Loss: 0.7216 | Train Acc: 0.8723 | Train F1: 0.8739 | Test Loss: 0.7702 | Test Acc: 0.8781 | Test F1: 0.8843
Epoch 7/7 | Train Loss: 0.7228 | Train Acc: 0.8723 | Train F1: 0.8739 | Test Loss: 0.7503 | Test Acc: 0.8844 | Test F1: 0.8843
Train time on cuda: 335.9254715999996
Inference Loss: 0.7227, Accuracy: 87.36%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇████
test_f1,▁▅▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆▇████
test_recall,▁▅▇████
train_acc,▁▅▇████
train_f1,▁▄▇████

0,1
epoch,7
inference_accuracy,0.87361
inference_loss,0.72272
test_acc,0.88438
test_f1,0.88426
test_loss,0.75026
test_precision,0.8936
test_recall,0.88333
train_acc,0.87228
train_f1,0.87385


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: d555gzy2 with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0009588784535949144
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.7149 | Train Acc: 0.4347 | Train F1: 0.4363 | Test Loss: 1.1019 | Test Acc: 0.7552 | Test F1: 0.7301
Epoch 2/7 | Train Loss: 0.8187 | Train Acc: 0.8097 | Train F1: 0.8032 | Test Loss: 0.6463 | Test Acc: 0.8958 | Test F1: 0.9074
Epoch 3/7 | Train Loss: 0.5547 | Train Acc: 0.8764 | Train F1: 0.8762 | Test Loss: 0.4822 | Test Acc: 0.9167 | Test F1: 0.9110
Epoch 4/7 | Train Loss: 0.4664 | Train Acc: 0.8917 | Train F1: 0.8920 | Test Loss: 0.4337 | Test Acc: 0.9167 | Test F1: 0.9129
Epoch 5/7 | Train Loss: 0.4242 | Train Acc: 0.9056 | Train F1: 0.9063 | Test Loss: 0.4365 | Test Acc: 0.9167 | Test F1: 0.9283
Epoch 6/7 | Train Loss: 0.4120 | Train Acc: 0.9097 | Train F1: 0.9105 | Test Loss: 0.4136 | Test Acc: 0.9323 | Test F1: 0.9283
Epoch 7/7 | Train Loss: 0.4120 | Train Acc: 0.9097 | Train F1: 0.9105 | Test Loss: 0.4294 | Test Acc: 0.9167 | Test F1: 0.9283
Train time on cuda: 334.30387300000075
Inference Loss: 0.4120, Accuracy: 90.97%


VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▇▇▇▇█▇
test_f1,▁▇▇▇███
test_loss,█▃▂▁▁▁▁
test_precision,▁▇▇▇███
test_recall,▁▇▇▇███
train_acc,▁▇█████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.90972
inference_loss,0.41203
test_acc,0.91667
test_f1,0.9283
test_loss,0.42936
test_precision,0.93684
test_recall,0.92778
train_acc,0.90972
train_f1,0.91048


wandb: Agent Starting Run: 2mr8r028 with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0004340808295440927
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.3980 | Train Acc: 0.6153 | Train F1: 0.6068 | Test Loss: 0.7994 | Test Acc: 0.8167 | Test F1: 0.8079
Epoch 2/7 | Train Loss: 0.6548 | Train Acc: 0.8556 | Train F1: 0.8536 | Test Loss: 0.5225 | Test Acc: 0.8944 | Test F1: 0.8963
Epoch 3/7 | Train Loss: 0.4850 | Train Acc: 0.9056 | Train F1: 0.9060 | Test Loss: 0.4360 | Test Acc: 0.9167 | Test F1: 0.9163
Epoch 4/7 | Train Loss: 0.4094 | Train Acc: 0.9208 | Train F1: 0.9210 | Test Loss: 0.3854 | Test Acc: 0.9278 | Test F1: 0.9287
Epoch 5/7 | Train Loss: 0.3752 | Train Acc: 0.9250 | Train F1: 0.9249 | Test Loss: 0.3669 | Test Acc: 0.9389 | Test F1: 0.9398
Epoch 6/7 | Train Loss: 0.3615 | Train Acc: 0.9278 | Train F1: 0.9277 | Test Loss: 0.3669 | Test Acc: 0.9389 | Test F1: 0.9398
Epoch 7/7 | Train Loss: 0.3615 | Train Acc: 0.9278 | Train F1: 0.9277 | Test Loss: 0.3669 | Test Acc: 0.9389 | Test F1: 0.9398
Train time on cuda: 330.41485309999916
Inference Loss: 0.3616, Accuracy: 92.92%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇▇███
test_f1,▁▆▇▇███
test_loss,█▄▂▁▁▁▁
test_precision,▁▆▇████
test_recall,▁▅▇▇███
train_acc,▁▆█████
train_f1,▁▆█████

0,1
epoch,7
inference_accuracy,0.92917
inference_loss,0.36155
test_acc,0.93889
test_f1,0.93977
test_loss,0.36692
test_precision,0.94642
test_recall,0.93889
train_acc,0.92778
train_f1,0.92766


wandb: Agent Starting Run: 4l2ndbmi with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0003607705143919802
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.9835 | Train Acc: 0.3444 | Train F1: 0.3404 | Test Loss: 1.6064 | Test Acc: 0.5781 | Test F1: 0.5555
Epoch 2/7 | Train Loss: 1.4236 | Train Acc: 0.6708 | Train F1: 0.6530 | Test Loss: 1.2201 | Test Acc: 0.7500 | Test F1: 0.7651
Epoch 3/7 | Train Loss: 1.1258 | Train Acc: 0.7819 | Train F1: 0.7708 | Test Loss: 0.9952 | Test Acc: 0.8333 | Test F1: 0.8185
Epoch 4/7 | Train Loss: 0.9687 | Train Acc: 0.8208 | Train F1: 0.8145 | Test Loss: 0.9051 | Test Acc: 0.8594 | Test F1: 0.8490
Epoch 5/7 | Train Loss: 0.8914 | Train Acc: 0.8500 | Train F1: 0.8476 | Test Loss: 0.8638 | Test Acc: 0.8281 | Test F1: 0.8490
Epoch 6/7 | Train Loss: 0.8681 | Train Acc: 0.8542 | Train F1: 0.8520 | Test Loss: 0.8374 | Test Acc: 0.8594 | Test F1: 0.8490
Epoch 7/7 | Train Loss: 0.8681 | Train Acc: 0.8542 | Train F1: 0.8520 | Test Loss: 0.8601 | Test Acc: 0.8594 | Test F1: 0.8490
Train time on cuda: 334.42929189999995
Inference Loss: 0.8681, Accuracy: 85.42%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇█▇██
test_f1,▁▆▇████
test_loss,█▄▂▂▁▁▁
test_precision,▁▆▇████
test_recall,▁▆▇████
train_acc,▁▅▇████
train_f1,▁▅▇▇███

0,1
epoch,7
inference_accuracy,0.85417
inference_loss,0.8681
test_acc,0.85938
test_f1,0.84901
test_loss,0.86006
test_precision,0.86538
test_recall,0.85
train_acc,0.85417
train_f1,0.85204


wandb: Agent Starting Run: bk4e6til with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0006469176531128905
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.8939 | Train Acc: 0.3492 | Train F1: 0.3286 | Test Loss: 1.6398 | Test Acc: 0.5042 | Test F1: 0.4582
Epoch 2/7 | Train Loss: 1.4160 | Train Acc: 0.6753 | Train F1: 0.6641 | Test Loss: 1.2602 | Test Acc: 0.7344 | Test F1: 0.7188
Epoch 3/7 | Train Loss: 1.1379 | Train Acc: 0.7812 | Train F1: 0.7752 | Test Loss: 1.0823 | Test Acc: 0.7656 | Test F1: 0.7474
Epoch 4/7 | Train Loss: 0.9788 | Train Acc: 0.7908 | Train F1: 0.7848 | Test Loss: 0.9738 | Test Acc: 0.8052 | Test F1: 0.7910
Epoch 5/7 | Train Loss: 0.9036 | Train Acc: 0.8084 | Train F1: 0.7992 | Test Loss: 0.9235 | Test Acc: 0.8344 | Test F1: 0.8270
Epoch 6/7 | Train Loss: 0.8807 | Train Acc: 0.8125 | Train F1: 0.8065 | Test Loss: 0.9375 | Test Acc: 0.8250 | Test F1: 0.8270
Epoch 7/7 | Train Loss: 0.8807 | Train Acc: 0.8139 | Train F1: 0.8065 | Test Loss: 0.9277 | Test Acc: 0.8312 | Test F1: 0.8270
Train time on cuda: 335.8245148999995
Inference Loss: 0.8782, Accuracy: 81.39%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▇▇███
test_f1,▁▆▆▇███
test_loss,█▄▃▁▁▁▁
test_precision,▁▆▇▇███
test_recall,▁▆▇▇███
train_acc,▁▆█████
train_f1,▁▆█████

0,1
epoch,7
inference_accuracy,0.81389
inference_loss,0.87815
test_acc,0.83125
test_f1,0.82699
test_loss,0.92768
test_precision,0.8403
test_recall,0.83333
train_acc,0.81386
train_f1,0.80649


wandb: Agent Starting Run: d3atwevv with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0006490417239497124
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.5459 | Train Acc: 0.5347 | Train F1: 0.5304 | Test Loss: 0.9989 | Test Acc: 0.7772 | Test F1: 0.7459
Epoch 2/7 | Train Loss: 0.7258 | Train Acc: 0.8222 | Train F1: 0.8172 | Test Loss: 0.6120 | Test Acc: 0.8804 | Test F1: 0.8864
Epoch 3/7 | Train Loss: 0.5149 | Train Acc: 0.8833 | Train F1: 0.8847 | Test Loss: 0.4766 | Test Acc: 0.9130 | Test F1: 0.9178
Epoch 4/7 | Train Loss: 0.4317 | Train Acc: 0.9083 | Train F1: 0.9092 | Test Loss: 0.4321 | Test Acc: 0.9293 | Test F1: 0.9296
Epoch 5/7 | Train Loss: 0.3987 | Train Acc: 0.9097 | Train F1: 0.9106 | Test Loss: 0.4294 | Test Acc: 0.9185 | Test F1: 0.9240
Epoch 6/7 | Train Loss: 0.3854 | Train Acc: 0.9181 | Train F1: 0.9193 | Test Loss: 0.4342 | Test Acc: 0.9130 | Test F1: 0.9240
Epoch 7/7 | Train Loss: 0.3854 | Train Acc: 0.9181 | Train F1: 0.9193 | Test Loss: 0.4282 | Test Acc: 0.9185 | Test F1: 0.9240
Train time on cuda: 333.57674419999967
Inference Loss: 0.3854, Accuracy: 91.81%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▇█▇▇▇
test_f1,▁▆█████
test_loss,█▃▂▁▁▁▁
test_precision,▁▆▇████
test_recall,▁▆▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.91806
inference_loss,0.38544
test_acc,0.91848
test_f1,0.92404
test_loss,0.42817
test_precision,0.93439
test_recall,0.92222
train_acc,0.91806
train_f1,0.91926


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: ivp2156z with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0006725051858322345
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.4057 | Train Acc: 0.5861 | Train F1: 0.5889 | Test Loss: 0.8318 | Test Acc: 0.8315 | Test F1: 0.8262
Epoch 2/7 | Train Loss: 0.6297 | Train Acc: 0.8708 | Train F1: 0.8702 | Test Loss: 0.5131 | Test Acc: 0.8967 | Test F1: 0.8946
Epoch 3/7 | Train Loss: 0.4611 | Train Acc: 0.9097 | Train F1: 0.9101 | Test Loss: 0.4324 | Test Acc: 0.9130 | Test F1: 0.9151
Epoch 4/7 | Train Loss: 0.3933 | Train Acc: 0.9222 | Train F1: 0.9233 | Test Loss: 0.3800 | Test Acc: 0.9402 | Test F1: 0.9398
Epoch 5/7 | Train Loss: 0.3641 | Train Acc: 0.9236 | Train F1: 0.9241 | Test Loss: 0.3640 | Test Acc: 0.9565 | Test F1: 0.9564
Epoch 6/7 | Train Loss: 0.3503 | Train Acc: 0.9333 | Train F1: 0.9339 | Test Loss: 0.3629 | Test Acc: 0.9565 | Test F1: 0.9564
Epoch 7/7 | Train Loss: 0.3503 | Train Acc: 0.9333 | Train F1: 0.9339 | Test Loss: 0.3895 | Test Acc: 0.9511 | Test F1: 0.9564
Train time on cuda: 333.16038670000125
Inference Loss: 0.3503, Accuracy: 93.33%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▆▇███
test_f1,▁▅▆▇███
test_loss,█▃▂▁▁▁▁
test_precision,▁▄▆▇███
test_recall,▁▅▆▇███
train_acc,▁▇█████
train_f1,▁▇█████

0,1
epoch,7
inference_accuracy,0.93333
inference_loss,0.35031
test_acc,0.95109
test_f1,0.95636
test_loss,0.38949
test_precision,0.96141
test_recall,0.95556
train_acc,0.93333
train_f1,0.93386


wandb: Agent Starting Run: 7zbaz7ro with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0007158758491460374
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.1183 | Train Acc: 0.2880 | Train F1: 0.2801 | Test Loss: 1.7542 | Test Acc: 0.4656 | Test F1: 0.4674
Epoch 2/7 | Train Loss: 1.5444 | Train Acc: 0.5965 | Train F1: 0.5785 | Test Loss: 1.3627 | Test Acc: 0.7031 | Test F1: 0.6879
Epoch 3/7 | Train Loss: 1.2135 | Train Acc: 0.7255 | Train F1: 0.7156 | Test Loss: 1.1267 | Test Acc: 0.8365 | Test F1: 0.8340
Epoch 4/7 | Train Loss: 1.0334 | Train Acc: 0.8139 | Train F1: 0.8083 | Test Loss: 1.0053 | Test Acc: 0.8521 | Test F1: 0.8537
Epoch 5/7 | Train Loss: 0.9500 | Train Acc: 0.8247 | Train F1: 0.8204 | Test Loss: 0.9461 | Test Acc: 0.8688 | Test F1: 0.8651
Epoch 6/7 | Train Loss: 0.9215 | Train Acc: 0.8383 | Train F1: 0.8365 | Test Loss: 0.9635 | Test Acc: 0.8625 | Test F1: 0.8651
Epoch 7/7 | Train Loss: 0.9242 | Train Acc: 0.8370 | Train F1: 0.8365 | Test Loss: 0.9505 | Test Acc: 0.8625 | Test F1: 0.8651
Train time on cuda: 336.0850611999995
Inference Loss: 0.9198, Accuracy: 83.75%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇████
test_f1,▁▅▇████
test_loss,█▅▃▂▁▁▁
test_precision,▁▅▇████
test_recall,▁▅█████
train_acc,▁▅▇████
train_f1,▁▅▆████

0,1
epoch,7
inference_accuracy,0.8375
inference_loss,0.91984
test_acc,0.8625
test_f1,0.86508
test_loss,0.9505
test_precision,0.86747
test_recall,0.86667
train_acc,0.83696
train_f1,0.83652


wandb: Agent Starting Run: zc1fbkex with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0008739049866466395
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.6181 | Train Acc: 0.4750 | Train F1: 0.4677 | Test Loss: 1.1072 | Test Acc: 0.7188 | Test F1: 0.6907
Epoch 2/7 | Train Loss: 0.8112 | Train Acc: 0.8208 | Train F1: 0.8152 | Test Loss: 0.6342 | Test Acc: 0.8906 | Test F1: 0.8855
Epoch 3/7 | Train Loss: 0.5614 | Train Acc: 0.8792 | Train F1: 0.8793 | Test Loss: 0.5117 | Test Acc: 0.9219 | Test F1: 0.9180
Epoch 4/7 | Train Loss: 0.4733 | Train Acc: 0.9000 | Train F1: 0.9005 | Test Loss: 0.5229 | Test Acc: 0.9010 | Test F1: 0.9292
Epoch 5/7 | Train Loss: 0.4342 | Train Acc: 0.9056 | Train F1: 0.9062 | Test Loss: 0.4314 | Test Acc: 0.9323 | Test F1: 0.9292
Epoch 6/7 | Train Loss: 0.4225 | Train Acc: 0.9083 | Train F1: 0.9089 | Test Loss: 0.4496 | Test Acc: 0.9167 | Test F1: 0.9292
Epoch 7/7 | Train Loss: 0.4225 | Train Acc: 0.9083 | Train F1: 0.9089 | Test Loss: 0.4240 | Test Acc: 0.9323 | Test F1: 0.9292
Train time on cuda: 334.7962927999979
Inference Loss: 0.4226, Accuracy: 90.83%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▇█▇█▇█
test_f1,▁▇█████
test_loss,█▃▂▂▁▁▁
test_precision,▁▅█████
test_recall,▁▆█████
train_acc,▁▇█████
train_f1,▁▇█████

0,1
epoch,7
inference_accuracy,0.90833
inference_loss,0.4226
test_acc,0.93229
test_f1,0.92917
test_loss,0.42401
test_precision,0.9365
test_recall,0.92778
train_acc,0.90833
train_f1,0.90892


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: iour4k1w with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.000700349804839871
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.9870 | Train Acc: 0.3560 | Train F1: 0.3449 | Test Loss: 1.6516 | Test Acc: 0.5833 | Test F1: 0.5434
Epoch 2/7 | Train Loss: 1.4298 | Train Acc: 0.6861 | Train F1: 0.6617 | Test Loss: 1.2304 | Test Acc: 0.7865 | Test F1: 0.7789
Epoch 3/7 | Train Loss: 1.1249 | Train Acc: 0.7853 | Train F1: 0.7788 | Test Loss: 1.0139 | Test Acc: 0.8240 | Test F1: 0.8143
Epoch 4/7 | Train Loss: 0.9611 | Train Acc: 0.8152 | Train F1: 0.8109 | Test Loss: 0.8982 | Test Acc: 0.8635 | Test F1: 0.8551
Epoch 5/7 | Train Loss: 0.8786 | Train Acc: 0.8315 | Train F1: 0.8253 | Test Loss: 0.8630 | Test Acc: 0.8646 | Test F1: 0.8658
Epoch 6/7 | Train Loss: 0.8561 | Train Acc: 0.8342 | Train F1: 0.8293 | Test Loss: 0.8500 | Test Acc: 0.8708 | Test F1: 0.8658
Epoch 7/7 | Train Loss: 0.8557 | Train Acc: 0.8342 | Train F1: 0.8293 | Test Loss: 0.8723 | Test Acc: 0.8646 | Test F1: 0.8658
Train time on cuda: 335.6178577000028
Inference Loss: 0.8607, Accuracy: 83.47%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▇████
test_f1,▁▆▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▅▆████
test_recall,▁▆▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.83472
inference_loss,0.86071
test_acc,0.86458
test_f1,0.8658
test_loss,0.87231
test_precision,0.87657
test_recall,0.87222
train_acc,0.83424
train_f1,0.82932


wandb: Agent Starting Run: 6k8g2gky with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0001211829966965244
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.2284 | Train Acc: 0.1291 | Train F1: 0.1030 | Test Loss: 2.1582 | Test Acc: 0.1375 | Test F1: 0.1066
Epoch 2/7 | Train Loss: 2.1016 | Train Acc: 0.1766 | Train F1: 0.1673 | Test Loss: 2.0622 | Test Acc: 0.1687 | Test F1: 0.1667
Epoch 3/7 | Train Loss: 2.0089 | Train Acc: 0.2296 | Train F1: 0.2278 | Test Loss: 1.9902 | Test Acc: 0.2240 | Test F1: 0.2294
Epoch 4/7 | Train Loss: 1.9446 | Train Acc: 0.2962 | Train F1: 0.2932 | Test Loss: 1.9480 | Test Acc: 0.2437 | Test F1: 0.2507
Epoch 5/7 | Train Loss: 1.9116 | Train Acc: 0.3247 | Train F1: 0.3313 | Test Loss: 1.9267 | Test Acc: 0.2729 | Test F1: 0.2755
Epoch 6/7 | Train Loss: 1.8981 | Train Acc: 0.3505 | Train F1: 0.3487 | Test Loss: 1.9288 | Test Acc: 0.2729 | Test F1: 0.2755
Epoch 7/7 | Train Loss: 1.9013 | Train Acc: 0.3410 | Train F1: 0.3487 | Test Loss: 1.9300 | Test Acc: 0.2792 | Test F1: 0.2755
Train time on cuda: 335.6797214000035
Inference Loss: 1.8993, Accuracy: 34.31%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▃▅▆███
test_f1,▁▃▆▇███
test_loss,█▅▃▂▁▁▁
test_precision,▁▅▇▇███
test_recall,▁▃▅▇███
train_acc,▁▃▄▆▇██
train_f1,▁▃▅▆███

0,1
epoch,7
inference_accuracy,0.34306
inference_loss,1.8993
test_acc,0.27917
test_f1,0.27546
test_loss,1.93
test_precision,0.30898
test_recall,0.27778
train_acc,0.34103
train_f1,0.34869


wandb: Agent Starting Run: w56e4zlv with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00045794194663201606
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.9391 | Train Acc: 0.3681 | Train F1: 0.3632 | Test Loss: 1.5011 | Test Acc: 0.5781 | Test F1: 0.5461
Epoch 2/7 | Train Loss: 1.2846 | Train Acc: 0.7042 | Train F1: 0.6986 | Test Loss: 1.1248 | Test Acc: 0.7448 | Test F1: 0.7362
Epoch 3/7 | Train Loss: 0.9787 | Train Acc: 0.7917 | Train F1: 0.7808 | Test Loss: 0.9012 | Test Acc: 0.8385 | Test F1: 0.8373
Epoch 4/7 | Train Loss: 0.8306 | Train Acc: 0.8333 | Train F1: 0.8288 | Test Loss: 0.7935 | Test Acc: 0.8646 | Test F1: 0.8532
Epoch 5/7 | Train Loss: 0.7646 | Train Acc: 0.8583 | Train F1: 0.8566 | Test Loss: 0.7611 | Test Acc: 0.8854 | Test F1: 0.8765
Epoch 6/7 | Train Loss: 0.7424 | Train Acc: 0.8625 | Train F1: 0.8610 | Test Loss: 0.7724 | Test Acc: 0.8698 | Test F1: 0.8765
Epoch 7/7 | Train Loss: 0.7424 | Train Acc: 0.8625 | Train F1: 0.8610 | Test Loss: 0.7616 | Test Acc: 0.8698 | Test F1: 0.8765
Train time on cuda: 334.8062076000024
Inference Loss: 0.7425, Accuracy: 86.25%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇████
test_f1,▁▅▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▄▇▇███
test_recall,▁▅▇▇███
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.8625
inference_loss,0.74254
test_acc,0.86979
test_f1,0.87653
test_loss,0.76162
test_precision,0.88584
test_recall,0.87778
train_acc,0.8625
train_f1,0.861


wandb: Agent Starting Run: ao6slgbx with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0004844083334286455
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.0275 | Train Acc: 0.2792 | Train F1: 0.2866 | Test Loss: 1.5753 | Test Acc: 0.5156 | Test F1: 0.4880
Epoch 2/7 | Train Loss: 1.3460 | Train Acc: 0.6444 | Train F1: 0.6209 | Test Loss: 1.1319 | Test Acc: 0.7708 | Test F1: 0.7642
Epoch 3/7 | Train Loss: 1.0141 | Train Acc: 0.7847 | Train F1: 0.7803 | Test Loss: 0.9185 | Test Acc: 0.8542 | Test F1: 0.8397
Epoch 4/7 | Train Loss: 0.8518 | Train Acc: 0.8417 | Train F1: 0.8396 | Test Loss: 0.7886 | Test Acc: 0.8750 | Test F1: 0.8642
Epoch 5/7 | Train Loss: 0.7780 | Train Acc: 0.8583 | Train F1: 0.8578 | Test Loss: 0.7564 | Test Acc: 0.8750 | Test F1: 0.8642
Epoch 6/7 | Train Loss: 0.7544 | Train Acc: 0.8639 | Train F1: 0.8633 | Test Loss: 0.7866 | Test Acc: 0.8594 | Test F1: 0.8642
Epoch 7/7 | Train Loss: 0.7544 | Train Acc: 0.8639 | Train F1: 0.8633 | Test Loss: 0.7732 | Test Acc: 0.8750 | Test F1: 0.8642
Train time on cuda: 334.8335837000013
Inference Loss: 0.7545, Accuracy: 86.39%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆█████
test_f1,▁▆█████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆█████
test_recall,▁▆█████
train_acc,▁▅▇████
train_f1,▁▅▇████

0,1
epoch,7
inference_accuracy,0.86389
inference_loss,0.7545
test_acc,0.875
test_f1,0.86425
test_loss,0.77321
test_precision,0.86692
test_recall,0.86667
train_acc,0.86389
train_f1,0.8633


wandb: Agent Starting Run: eg3tjpd3 with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00022279951979117932
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.0154 | Train Acc: 0.3472 | Train F1: 0.3431 | Test Loss: 1.6403 | Test Acc: 0.5707 | Test F1: 0.5508
Epoch 2/7 | Train Loss: 1.4063 | Train Acc: 0.7167 | Train F1: 0.7103 | Test Loss: 1.2166 | Test Acc: 0.7772 | Test F1: 0.7674
Epoch 3/7 | Train Loss: 1.1064 | Train Acc: 0.8000 | Train F1: 0.7955 | Test Loss: 1.0238 | Test Acc: 0.8315 | Test F1: 0.8286
Epoch 4/7 | Train Loss: 0.9532 | Train Acc: 0.8222 | Train F1: 0.8191 | Test Loss: 0.9347 | Test Acc: 0.8424 | Test F1: 0.8445
Epoch 5/7 | Train Loss: 0.8810 | Train Acc: 0.8389 | Train F1: 0.8364 | Test Loss: 0.8874 | Test Acc: 0.8641 | Test F1: 0.8598
Epoch 6/7 | Train Loss: 0.8579 | Train Acc: 0.8444 | Train F1: 0.8422 | Test Loss: 0.8760 | Test Acc: 0.8641 | Test F1: 0.8598
Epoch 7/7 | Train Loss: 0.8579 | Train Acc: 0.8444 | Train F1: 0.8422 | Test Loss: 0.8777 | Test Acc: 0.8641 | Test F1: 0.8598
Train time on cuda: 333.60435359999974
Inference Loss: 0.8579, Accuracy: 84.44%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▇▇███
test_f1,▁▆▇████
test_loss,█▄▂▂▁▁▁
test_precision,▁▄▇████
test_recall,▁▆▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.84444
inference_loss,0.85785
test_acc,0.86413
test_f1,0.85978
test_loss,0.87773
test_precision,0.87204
test_recall,0.86111
train_acc,0.84444
train_f1,0.84218


wandb: Agent Starting Run: 4vbro3lj with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.000915153853097595
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.0666 | Train Acc: 0.6653 | Train F1: 0.6647 | Test Loss: 0.4867 | Test Acc: 0.8833 | Test F1: 0.8802
Epoch 2/7 | Train Loss: 0.3841 | Train Acc: 0.9042 | Train F1: 0.9041 | Test Loss: 0.3213 | Test Acc: 0.9333 | Test F1: 0.9328
Epoch 3/7 | Train Loss: 0.2878 | Train Acc: 0.9306 | Train F1: 0.9311 | Test Loss: 0.2660 | Test Acc: 0.9500 | Test F1: 0.9505
Epoch 4/7 | Train Loss: 0.2389 | Train Acc: 0.9486 | Train F1: 0.9485 | Test Loss: 0.2362 | Test Acc: 0.9667 | Test F1: 0.9669
Epoch 5/7 | Train Loss: 0.2116 | Train Acc: 0.9583 | Train F1: 0.9583 | Test Loss: 0.2332 | Test Acc: 0.9667 | Test F1: 0.9669
Epoch 6/7 | Train Loss: 0.1994 | Train Acc: 0.9639 | Train F1: 0.9640 | Test Loss: 0.2332 | Test Acc: 0.9667 | Test F1: 0.9669
Epoch 7/7 | Train Loss: 0.1994 | Train Acc: 0.9639 | Train F1: 0.9640 | Test Loss: 0.2332 | Test Acc: 0.9667 | Test F1: 0.9669
Train time on cuda: 330.51596869999776
Inference Loss: 0.1994, Accuracy: 96.39%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇████
test_f1,▁▅▇████
test_loss,█▃▂▁▁▁▁
test_precision,▁▅▇████
test_recall,▁▅▇████
train_acc,▁▇▇████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.96389
inference_loss,0.19942
test_acc,0.96667
test_f1,0.96692
test_loss,0.23322
test_precision,0.97135
test_recall,0.96667
train_acc,0.96389
train_f1,0.96397


wandb: Agent Starting Run: f2pe4m6r with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00017248641280476223
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.7276 | Train Acc: 0.4861 | Train F1: 0.4868 | Test Loss: 1.3518 | Test Acc: 0.7444 | Test F1: 0.7390
Epoch 2/7 | Train Loss: 1.1193 | Train Acc: 0.7972 | Train F1: 0.7954 | Test Loss: 0.9867 | Test Acc: 0.8278 | Test F1: 0.8258
Epoch 3/7 | Train Loss: 0.8715 | Train Acc: 0.8458 | Train F1: 0.8425 | Test Loss: 0.8238 | Test Acc: 0.8500 | Test F1: 0.8512
Epoch 4/7 | Train Loss: 0.7546 | Train Acc: 0.8806 | Train F1: 0.8806 | Test Loss: 0.7431 | Test Acc: 0.8722 | Test F1: 0.8721
Epoch 5/7 | Train Loss: 0.6956 | Train Acc: 0.8931 | Train F1: 0.8924 | Test Loss: 0.7077 | Test Acc: 0.8722 | Test F1: 0.8730
Epoch 6/7 | Train Loss: 0.6756 | Train Acc: 0.9000 | Train F1: 0.8996 | Test Loss: 0.7077 | Test Acc: 0.8722 | Test F1: 0.8730
Epoch 7/7 | Train Loss: 0.6756 | Train Acc: 0.9000 | Train F1: 0.8996 | Test Loss: 0.7077 | Test Acc: 0.8722 | Test F1: 0.8730
Train time on cuda: 330.74654250000094
Inference Loss: 0.6757, Accuracy: 90.00%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▇████
test_f1,▁▆▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆▇████
test_recall,▁▆▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.9
inference_loss,0.67567
test_acc,0.87222
test_f1,0.87304
test_loss,0.70765
test_precision,0.88582
test_recall,0.87222
train_acc,0.9
train_f1,0.89957


wandb: Agent Starting Run: 9m06gi2c with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0009174072384702496
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.3435 | Train Acc: 0.6042 | Train F1: 0.6007 | Test Loss: 0.7032 | Test Acc: 0.8370 | Test F1: 0.8127
Epoch 2/7 | Train Loss: 0.5421 | Train Acc: 0.8583 | Train F1: 0.8568 | Test Loss: 0.4502 | Test Acc: 0.8641 | Test F1: 0.8714
Epoch 3/7 | Train Loss: 0.3923 | Train Acc: 0.9097 | Train F1: 0.9106 | Test Loss: 0.3399 | Test Acc: 0.9457 | Test F1: 0.9456
Epoch 4/7 | Train Loss: 0.3327 | Train Acc: 0.9306 | Train F1: 0.9310 | Test Loss: 0.3038 | Test Acc: 0.9620 | Test F1: 0.9619
Epoch 5/7 | Train Loss: 0.3043 | Train Acc: 0.9417 | Train F1: 0.9418 | Test Loss: 0.3012 | Test Acc: 0.9565 | Test F1: 0.9560
Epoch 6/7 | Train Loss: 0.2911 | Train Acc: 0.9472 | Train F1: 0.9475 | Test Loss: 0.2985 | Test Acc: 0.9565 | Test F1: 0.9560
Epoch 7/7 | Train Loss: 0.2911 | Train Acc: 0.9472 | Train F1: 0.9475 | Test Loss: 0.2972 | Test Acc: 0.9565 | Test F1: 0.9560
Train time on cuda: 333.36595859999943
Inference Loss: 0.2911, Accuracy: 94.72%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▃▇████
test_f1,▁▄▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▃▇████
test_recall,▁▃▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.94722
inference_loss,0.29114
test_acc,0.95652
test_f1,0.95601
test_loss,0.2972
test_precision,0.96557
test_recall,0.95556
train_acc,0.94722
train_f1,0.94752


wandb: Agent Starting Run: li8vtlxb with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.000287435434823635
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.7677 | Train Acc: 0.4764 | Train F1: 0.4645 | Test Loss: 1.3767 | Test Acc: 0.6576 | Test F1: 0.6500
Epoch 2/7 | Train Loss: 1.1437 | Train Acc: 0.7861 | Train F1: 0.7843 | Test Loss: 1.0075 | Test Acc: 0.7935 | Test F1: 0.7951
Epoch 3/7 | Train Loss: 0.8868 | Train Acc: 0.8250 | Train F1: 0.8202 | Test Loss: 0.8284 | Test Acc: 0.8641 | Test F1: 0.8521
Epoch 4/7 | Train Loss: 0.7623 | Train Acc: 0.8583 | Train F1: 0.8565 | Test Loss: 0.7495 | Test Acc: 0.8804 | Test F1: 0.8770
Epoch 5/7 | Train Loss: 0.7020 | Train Acc: 0.8764 | Train F1: 0.8753 | Test Loss: 0.7117 | Test Acc: 0.8804 | Test F1: 0.8765
Epoch 6/7 | Train Loss: 0.6830 | Train Acc: 0.8806 | Train F1: 0.8791 | Test Loss: 0.7160 | Test Acc: 0.8804 | Test F1: 0.8765
Epoch 7/7 | Train Loss: 0.6830 | Train Acc: 0.8806 | Train F1: 0.8791 | Test Loss: 0.7130 | Test Acc: 0.8750 | Test F1: 0.8765
Train time on cuda: 333.46976949999953
Inference Loss: 0.6830, Accuracy: 88.06%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇████
test_f1,▁▅▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▅▇████
test_recall,▁▆▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.88056
inference_loss,0.68303
test_acc,0.875
test_f1,0.87649
test_loss,0.713
test_precision,0.88648
test_recall,0.87778
train_acc,0.88056
train_f1,0.87908


wandb: Agent Starting Run: vgammxy5 with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0007220659918459098
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.6849 | Train Acc: 0.4944 | Train F1: 0.4939 | Test Loss: 1.0931 | Test Acc: 0.7812 | Test F1: 0.7521
Epoch 2/7 | Train Loss: 0.9233 | Train Acc: 0.7917 | Train F1: 0.7844 | Test Loss: 0.7448 | Test Acc: 0.8698 | Test F1: 0.8511
Epoch 3/7 | Train Loss: 0.6572 | Train Acc: 0.8639 | Train F1: 0.8628 | Test Loss: 0.5999 | Test Acc: 0.9062 | Test F1: 0.8983
Epoch 4/7 | Train Loss: 0.5503 | Train Acc: 0.8931 | Train F1: 0.8932 | Test Loss: 0.5191 | Test Acc: 0.9115 | Test F1: 0.9052
Epoch 5/7 | Train Loss: 0.5057 | Train Acc: 0.8944 | Train F1: 0.8953 | Test Loss: 0.4905 | Test Acc: 0.9062 | Test F1: 0.9004
Epoch 6/7 | Train Loss: 0.4919 | Train Acc: 0.9042 | Train F1: 0.9049 | Test Loss: 0.4928 | Test Acc: 0.9062 | Test F1: 0.9004
Epoch 7/7 | Train Loss: 0.4919 | Train Acc: 0.9042 | Train F1: 0.9049 | Test Loss: 0.5259 | Test Acc: 0.8906 | Test F1: 0.9004
Train time on cuda: 334.78454729999794
Inference Loss: 0.4920, Accuracy: 90.28%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆████▇
test_f1,▁▆█████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆▇████
test_recall,▁▆█████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.90278
inference_loss,0.492
test_acc,0.89062
test_f1,0.90042
test_loss,0.52588
test_precision,0.91175
test_recall,0.9
train_acc,0.90417
train_f1,0.90489


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: 1q5d4oed with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0002717153052071325
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.6457 | Train Acc: 0.5000 | Train F1: 0.4876 | Test Loss: 1.0808 | Test Acc: 0.8222 | Test F1: 0.8205
Epoch 2/7 | Train Loss: 0.9001 | Train Acc: 0.8319 | Train F1: 0.8300 | Test Loss: 0.7450 | Test Acc: 0.8556 | Test F1: 0.8560
Epoch 3/7 | Train Loss: 0.6710 | Train Acc: 0.8653 | Train F1: 0.8648 | Test Loss: 0.6023 | Test Acc: 0.9111 | Test F1: 0.9106
Epoch 4/7 | Train Loss: 0.5766 | Train Acc: 0.8819 | Train F1: 0.8808 | Test Loss: 0.5405 | Test Acc: 0.9056 | Test F1: 0.9073
Epoch 5/7 | Train Loss: 0.5264 | Train Acc: 0.9028 | Train F1: 0.9036 | Test Loss: 0.5092 | Test Acc: 0.9111 | Test F1: 0.9127
Epoch 6/7 | Train Loss: 0.5091 | Train Acc: 0.9111 | Train F1: 0.9117 | Test Loss: 0.5092 | Test Acc: 0.9111 | Test F1: 0.9127
Epoch 7/7 | Train Loss: 0.5091 | Train Acc: 0.9111 | Train F1: 0.9117 | Test Loss: 0.5092 | Test Acc: 0.9111 | Test F1: 0.9127
Train time on cuda: 330.581757699998
Inference Loss: 0.5091, Accuracy: 91.11%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▄█████
test_f1,▁▄█████
test_loss,█▄▂▁▁▁▁
test_precision,▁▅█▇▇▇▇
test_recall,▁▄█████
train_acc,▁▇▇████
train_f1,▁▇▇▇███

0,1
epoch,7
inference_accuracy,0.91111
inference_loss,0.50913
test_acc,0.91111
test_f1,0.91265
test_loss,0.50918
test_precision,0.92436
test_recall,0.91111
train_acc,0.91111
train_f1,0.91169


wandb: Agent Starting Run: e998iolw with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0005897828093536084
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.8969 | Train Acc: 0.4083 | Train F1: 0.3983 | Test Loss: 1.3899 | Test Acc: 0.7188 | Test F1: 0.7267
Epoch 2/7 | Train Loss: 1.1106 | Train Acc: 0.7944 | Train F1: 0.7920 | Test Loss: 0.9092 | Test Acc: 0.8542 | Test F1: 0.8592
Epoch 3/7 | Train Loss: 0.7934 | Train Acc: 0.8486 | Train F1: 0.8461 | Test Loss: 0.7316 | Test Acc: 0.9167 | Test F1: 0.9104
Epoch 4/7 | Train Loss: 0.6602 | Train Acc: 0.8736 | Train F1: 0.8729 | Test Loss: 0.6202 | Test Acc: 0.9219 | Test F1: 0.9167
Epoch 5/7 | Train Loss: 0.6039 | Train Acc: 0.8819 | Train F1: 0.8824 | Test Loss: 0.6182 | Test Acc: 0.9167 | Test F1: 0.9286
Epoch 6/7 | Train Loss: 0.5858 | Train Acc: 0.8875 | Train F1: 0.8885 | Test Loss: 0.5933 | Test Acc: 0.9323 | Test F1: 0.9286
Epoch 7/7 | Train Loss: 0.5858 | Train Acc: 0.8875 | Train F1: 0.8885 | Test Loss: 0.6166 | Test Acc: 0.9167 | Test F1: 0.9286
Train time on cuda: 334.45998060000056
Inference Loss: 0.5859, Accuracy: 88.75%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇█▇█▇
test_f1,▁▆▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▅▇▇███
test_recall,▁▆▇████
train_acc,▁▇▇████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.8875
inference_loss,0.58588
test_acc,0.91667
test_f1,0.92859
test_loss,0.61661
test_precision,0.93628
test_recall,0.92778
train_acc,0.8875
train_f1,0.8885


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: savyawid with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0009396424068982048
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.2148 | Train Acc: 0.6444 | Train F1: 0.6384 | Test Loss: 0.6283 | Test Acc: 0.8913 | Test F1: 0.8879
Epoch 2/7 | Train Loss: 0.4931 | Train Acc: 0.8806 | Train F1: 0.8799 | Test Loss: 0.3927 | Test Acc: 0.8967 | Test F1: 0.8937
Epoch 3/7 | Train Loss: 0.3574 | Train Acc: 0.9139 | Train F1: 0.9142 | Test Loss: 0.3321 | Test Acc: 0.9511 | Test F1: 0.9515
Epoch 4/7 | Train Loss: 0.3049 | Train Acc: 0.9389 | Train F1: 0.9392 | Test Loss: 0.2912 | Test Acc: 0.9457 | Test F1: 0.9456
Epoch 5/7 | Train Loss: 0.2777 | Train Acc: 0.9444 | Train F1: 0.9449 | Test Loss: 0.2937 | Test Acc: 0.9402 | Test F1: 0.9456
Epoch 6/7 | Train Loss: 0.2676 | Train Acc: 0.9500 | Train F1: 0.9504 | Test Loss: 0.2807 | Test Acc: 0.9457 | Test F1: 0.9456
Epoch 7/7 | Train Loss: 0.2676 | Train Acc: 0.9500 | Train F1: 0.9504 | Test Loss: 0.2879 | Test Acc: 0.9457 | Test F1: 0.9456
Train time on cuda: 333.0815440000006
Inference Loss: 0.2676, Accuracy: 95.00%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▂█▇▇▇▇
test_f1,▁▂█▇▇▇▇
test_loss,█▃▂▁▁▁▁
test_precision,▂▁█▇▇▇▇
test_recall,▁▂█▇▇▇▇
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.95
inference_loss,0.26759
test_acc,0.94565
test_f1,0.94563
test_loss,0.28789
test_precision,0.9533
test_recall,0.94444
train_acc,0.95
train_f1,0.95037


wandb: Agent Starting Run: qyzbhm45 with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.000529553947011024
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.5614 | Train Acc: 0.5444 | Train F1: 0.5553 | Test Loss: 1.0212 | Test Acc: 0.7717 | Test F1: 0.7361
Epoch 2/7 | Train Loss: 0.7759 | Train Acc: 0.8319 | Train F1: 0.8247 | Test Loss: 0.6226 | Test Acc: 0.8696 | Test F1: 0.8727
Epoch 3/7 | Train Loss: 0.5626 | Train Acc: 0.8861 | Train F1: 0.8858 | Test Loss: 0.5165 | Test Acc: 0.9076 | Test F1: 0.9069
Epoch 4/7 | Train Loss: 0.4809 | Train Acc: 0.9028 | Train F1: 0.9032 | Test Loss: 0.4683 | Test Acc: 0.9185 | Test F1: 0.9238
Epoch 5/7 | Train Loss: 0.4424 | Train Acc: 0.9111 | Train F1: 0.9115 | Test Loss: 0.4370 | Test Acc: 0.9293 | Test F1: 0.9290
Epoch 6/7 | Train Loss: 0.4294 | Train Acc: 0.9139 | Train F1: 0.9144 | Test Loss: 0.4435 | Test Acc: 0.9239 | Test F1: 0.9290
Epoch 7/7 | Train Loss: 0.4294 | Train Acc: 0.9139 | Train F1: 0.9144 | Test Loss: 0.4344 | Test Acc: 0.9293 | Test F1: 0.9290
Train time on cuda: 335.0206175000021
Inference Loss: 0.4295, Accuracy: 91.39%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇████
test_f1,▁▆▇████
test_loss,█▃▂▁▁▁▁
test_precision,▁▅▇████
test_recall,▁▅▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.91389
inference_loss,0.42949
test_acc,0.92935
test_f1,0.92896
test_loss,0.43442
test_precision,0.94031
test_recall,0.92778
train_acc,0.91389
train_f1,0.9144


wandb: Agent Starting Run: 0hjt9w2a with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00014325958257280553
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.0149 | Train Acc: 0.2819 | Train F1: 0.2945 | Test Loss: 1.5977 | Test Acc: 0.6000 | Test F1: 0.5746
Epoch 2/7 | Train Loss: 1.3760 | Train Acc: 0.7167 | Train F1: 0.7125 | Test Loss: 1.1718 | Test Acc: 0.8389 | Test F1: 0.8372
Epoch 3/7 | Train Loss: 1.0700 | Train Acc: 0.8278 | Train F1: 0.8279 | Test Loss: 0.9805 | Test Acc: 0.8556 | Test F1: 0.8521
Epoch 4/7 | Train Loss: 0.9213 | Train Acc: 0.8431 | Train F1: 0.8412 | Test Loss: 0.8800 | Test Acc: 0.8556 | Test F1: 0.8535
Epoch 5/7 | Train Loss: 0.8496 | Train Acc: 0.8542 | Train F1: 0.8539 | Test Loss: 0.8376 | Test Acc: 0.8667 | Test F1: 0.8664
Epoch 6/7 | Train Loss: 0.8251 | Train Acc: 0.8556 | Train F1: 0.8553 | Test Loss: 0.8376 | Test Acc: 0.8667 | Test F1: 0.8664
Epoch 7/7 | Train Loss: 0.8251 | Train Acc: 0.8556 | Train F1: 0.8553 | Test Loss: 0.8376 | Test Acc: 0.8667 | Test F1: 0.8664
Train time on cuda: 330.9667788000006
Inference Loss: 0.8251, Accuracy: 85.56%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▇█████
test_f1,▁▇█████
test_loss,█▄▂▁▁▁▁
test_precision,▁▇█████
test_recall,▁▇█████
train_acc,▁▆█████
train_f1,▁▆█████

0,1
epoch,7
inference_accuracy,0.85556
inference_loss,0.82515
test_acc,0.86667
test_f1,0.86637
test_loss,0.83759
test_precision,0.88191
test_recall,0.86667
train_acc,0.85556
train_f1,0.8553


wandb: Agent Starting Run: mcqo3ddw with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00015944708176620826
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.0829 | Train Acc: 0.2153 | Train F1: 0.1894 | Test Loss: 1.9075 | Test Acc: 0.3229 | Test F1: 0.2887
Epoch 2/7 | Train Loss: 1.7616 | Train Acc: 0.4722 | Train F1: 0.4371 | Test Loss: 1.7237 | Test Acc: 0.4948 | Test F1: 0.4784
Epoch 3/7 | Train Loss: 1.5785 | Train Acc: 0.5819 | Train F1: 0.5464 | Test Loss: 1.5404 | Test Acc: 0.6198 | Test F1: 0.5843
Epoch 4/7 | Train Loss: 1.4576 | Train Acc: 0.6458 | Train F1: 0.6227 | Test Loss: 1.4395 | Test Acc: 0.6562 | Test F1: 0.6293
Epoch 5/7 | Train Loss: 1.3905 | Train Acc: 0.6750 | Train F1: 0.6582 | Test Loss: 1.4297 | Test Acc: 0.6250 | Test F1: 0.6298
Epoch 6/7 | Train Loss: 1.3680 | Train Acc: 0.6861 | Train F1: 0.6711 | Test Loss: 1.4056 | Test Acc: 0.6562 | Test F1: 0.6298
Epoch 7/7 | Train Loss: 1.3680 | Train Acc: 0.6861 | Train F1: 0.6711 | Test Loss: 1.4033 | Test Acc: 0.6406 | Test F1: 0.6298
Train time on cuda: 334.8881515999965
Inference Loss: 1.3680, Accuracy: 68.61%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇█▇██
test_f1,▁▅▇████
test_loss,█▅▃▂▁▁▁
test_precision,▁▅▇████
test_recall,▁▅▇████
train_acc,▁▅▆▇███
train_f1,▁▅▆▇███

0,1
epoch,7
inference_accuracy,0.68611
inference_loss,1.36804
test_acc,0.64062
test_f1,0.62982
test_loss,1.40333
test_precision,0.63789
test_recall,0.65
train_acc,0.68611
train_f1,0.67111


wandb: Agent Starting Run: 8kij1q0b with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.000554367158385364
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.3871 | Train Acc: 0.5792 | Train F1: 0.5804 | Test Loss: 0.6902 | Test Acc: 0.8778 | Test F1: 0.8796
Epoch 2/7 | Train Loss: 0.5585 | Train Acc: 0.8764 | Train F1: 0.8765 | Test Loss: 0.4360 | Test Acc: 0.9222 | Test F1: 0.9227
Epoch 3/7 | Train Loss: 0.4157 | Train Acc: 0.9236 | Train F1: 0.9242 | Test Loss: 0.3566 | Test Acc: 0.9444 | Test F1: 0.9454
Epoch 4/7 | Train Loss: 0.3468 | Train Acc: 0.9319 | Train F1: 0.9326 | Test Loss: 0.3257 | Test Acc: 0.9611 | Test F1: 0.9619
Epoch 5/7 | Train Loss: 0.3159 | Train Acc: 0.9389 | Train F1: 0.9392 | Test Loss: 0.3111 | Test Acc: 0.9611 | Test F1: 0.9619
Epoch 6/7 | Train Loss: 0.3026 | Train Acc: 0.9431 | Train F1: 0.9434 | Test Loss: 0.3111 | Test Acc: 0.9611 | Test F1: 0.9619
Epoch 7/7 | Train Loss: 0.3026 | Train Acc: 0.9431 | Train F1: 0.9434 | Test Loss: 0.3111 | Test Acc: 0.9611 | Test F1: 0.9619
Train time on cuda: 330.80358319999505
Inference Loss: 0.3027, Accuracy: 94.31%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇████
test_f1,▁▅▇████
test_loss,█▃▂▁▁▁▁
test_precision,▁▃▆████
test_recall,▁▅▇████
train_acc,▁▇█████
train_f1,▁▇█████

0,1
epoch,7
inference_accuracy,0.94306
inference_loss,0.30268
test_acc,0.96111
test_f1,0.96191
test_loss,0.31113
test_precision,0.96861
test_recall,0.96111
train_acc,0.94306
train_f1,0.94338


wandb: Agent Starting Run: 81a16oai with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0003497894037364741
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.6657 | Train Acc: 0.4903 | Train F1: 0.4823 | Test Loss: 1.0349 | Test Acc: 0.8222 | Test F1: 0.8198
Epoch 2/7 | Train Loss: 0.8179 | Train Acc: 0.8181 | Train F1: 0.8103 | Test Loss: 0.6667 | Test Acc: 0.8944 | Test F1: 0.8969
Epoch 3/7 | Train Loss: 0.5898 | Train Acc: 0.8792 | Train F1: 0.8793 | Test Loss: 0.5450 | Test Acc: 0.9056 | Test F1: 0.9034
Epoch 4/7 | Train Loss: 0.5020 | Train Acc: 0.8917 | Train F1: 0.8923 | Test Loss: 0.4722 | Test Acc: 0.9389 | Test F1: 0.9404
Epoch 5/7 | Train Loss: 0.4580 | Train Acc: 0.9125 | Train F1: 0.9125 | Test Loss: 0.4510 | Test Acc: 0.9389 | Test F1: 0.9404
Epoch 6/7 | Train Loss: 0.4417 | Train Acc: 0.9167 | Train F1: 0.9167 | Test Loss: 0.4510 | Test Acc: 0.9389 | Test F1: 0.9404
Epoch 7/7 | Train Loss: 0.4417 | Train Acc: 0.9167 | Train F1: 0.9167 | Test Loss: 0.4510 | Test Acc: 0.9389 | Test F1: 0.9404
Train time on cuda: 330.78122019999864
Inference Loss: 0.4418, Accuracy: 91.53%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▆████
test_f1,▁▅▆████
test_loss,█▄▂▁▁▁▁
test_precision,▁▅▆████
test_recall,▁▅▆████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.91528
inference_loss,0.44179
test_acc,0.93889
test_f1,0.94036
test_loss,0.45097
test_precision,0.94678
test_recall,0.93889
train_acc,0.91667
train_f1,0.91672


wandb: Sweep Agent: Waiting for job.
wandb: Job received.
wandb: Agent Starting Run: 14pj0vht with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0009199528939676042
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.7230 | Train Acc: 0.4250 | Train F1: 0.4330 | Test Loss: 1.0526 | Test Acc: 0.7240 | Test F1: 0.7061
Epoch 2/7 | Train Loss: 0.8274 | Train Acc: 0.8042 | Train F1: 0.7958 | Test Loss: 0.6323 | Test Acc: 0.8594 | Test F1: 0.8649
Epoch 3/7 | Train Loss: 0.5689 | Train Acc: 0.8764 | Train F1: 0.8768 | Test Loss: 0.4956 | Test Acc: 0.8802 | Test F1: 0.8905
Epoch 4/7 | Train Loss: 0.4787 | Train Acc: 0.8972 | Train F1: 0.8978 | Test Loss: 0.4707 | Test Acc: 0.8750 | Test F1: 0.9012
Epoch 5/7 | Train Loss: 0.4371 | Train Acc: 0.9083 | Train F1: 0.9090 | Test Loss: 0.4313 | Test Acc: 0.8958 | Test F1: 0.9074
Epoch 6/7 | Train Loss: 0.4248 | Train Acc: 0.9181 | Train F1: 0.9188 | Test Loss: 0.4189 | Test Acc: 0.8958 | Test F1: 0.9074
Epoch 7/7 | Train Loss: 0.4248 | Train Acc: 0.9181 | Train F1: 0.9188 | Test Loss: 0.4297 | Test Acc: 0.8958 | Test F1: 0.9074
Train time on cuda: 334.6500118999975
Inference Loss: 0.4248, Accuracy: 91.81%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▇▇▇███
test_f1,▁▇▇████
test_loss,█▃▂▂▁▁▁
test_precision,▁▇▇████
test_recall,▁▆▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.91806
inference_loss,0.42478
test_acc,0.89583
test_f1,0.90738
test_loss,0.42971
test_precision,0.91978
test_recall,0.90556
train_acc,0.91806
train_f1,0.91879


wandb: Agent Starting Run: 06xo6ywo with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.000543701601379045
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.6234 | Train Acc: 0.5014 | Train F1: 0.5042 | Test Loss: 1.0088 | Test Acc: 0.7772 | Test F1: 0.7643
Epoch 2/7 | Train Loss: 0.8059 | Train Acc: 0.8111 | Train F1: 0.8097 | Test Loss: 0.6560 | Test Acc: 0.8587 | Test F1: 0.8492
Epoch 3/7 | Train Loss: 0.5851 | Train Acc: 0.8694 | Train F1: 0.8674 | Test Loss: 0.5366 | Test Acc: 0.9130 | Test F1: 0.9139
Epoch 4/7 | Train Loss: 0.4969 | Train Acc: 0.9000 | Train F1: 0.9010 | Test Loss: 0.4719 | Test Acc: 0.9293 | Test F1: 0.9300
Epoch 5/7 | Train Loss: 0.4567 | Train Acc: 0.8972 | Train F1: 0.8978 | Test Loss: 0.4523 | Test Acc: 0.9402 | Test F1: 0.9398
Epoch 6/7 | Train Loss: 0.4433 | Train Acc: 0.9014 | Train F1: 0.9021 | Test Loss: 0.4537 | Test Acc: 0.9402 | Test F1: 0.9398
Epoch 7/7 | Train Loss: 0.4433 | Train Acc: 0.9014 | Train F1: 0.9021 | Test Loss: 0.4484 | Test Acc: 0.9402 | Test F1: 0.9398
Train time on cuda: 333.32329749999917
Inference Loss: 0.4435, Accuracy: 90.14%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▄▇████
test_f1,▁▄▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▅▇████
test_recall,▁▅▇████
train_acc,▁▆▇████
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.90139
inference_loss,0.44345
test_acc,0.94022
test_f1,0.93984
test_loss,0.44836
test_precision,0.94881
test_recall,0.93889
train_acc,0.90139
train_f1,0.90211


wandb: Agent Starting Run: 4rsxei3k with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.000609949013323087
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.1574 | Train Acc: 0.6819 | Train F1: 0.6819 | Test Loss: 0.5820 | Test Acc: 0.8778 | Test F1: 0.8698
Epoch 2/7 | Train Loss: 0.4908 | Train Acc: 0.8931 | Train F1: 0.8933 | Test Loss: 0.3805 | Test Acc: 0.9389 | Test F1: 0.9401
Epoch 3/7 | Train Loss: 0.3681 | Train Acc: 0.9181 | Train F1: 0.9183 | Test Loss: 0.3227 | Test Acc: 0.9444 | Test F1: 0.9453
Epoch 4/7 | Train Loss: 0.3149 | Train Acc: 0.9333 | Train F1: 0.9335 | Test Loss: 0.3107 | Test Acc: 0.9222 | Test F1: 0.9212
Epoch 5/7 | Train Loss: 0.2823 | Train Acc: 0.9472 | Train F1: 0.9472 | Test Loss: 0.2802 | Test Acc: 0.9611 | Test F1: 0.9618
Epoch 6/7 | Train Loss: 0.2699 | Train Acc: 0.9528 | Train F1: 0.9529 | Test Loss: 0.2802 | Test Acc: 0.9611 | Test F1: 0.9618
Epoch 7/7 | Train Loss: 0.2699 | Train Acc: 0.9528 | Train F1: 0.9529 | Test Loss: 0.2802 | Test Acc: 0.9611 | Test F1: 0.9618
Train time on cuda: 330.64611850000074
Inference Loss: 0.2699, Accuracy: 95.28%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▆▇▅███
test_f1,▁▆▇▅███
test_loss,█▃▂▂▁▁▁
test_precision,▁▆▆▄███
test_recall,▁▆▇▅███
train_acc,▁▆▇▇███
train_f1,▁▆▇████

0,1
epoch,7
inference_accuracy,0.95278
inference_loss,0.26992
test_acc,0.96111
test_f1,0.96183
test_loss,0.28025
test_precision,0.96721
test_recall,0.96111
train_acc,0.95278
train_f1,0.95285


wandb: Agent Starting Run: xqkl47gv with config:
wandb: 	batch_size: 4
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0005650421735069941
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.2972 | Train Acc: 0.6097 | Train F1: 0.6119 | Test Loss: 0.6827 | Test Acc: 0.8778 | Test F1: 0.8762
Epoch 2/7 | Train Loss: 0.5364 | Train Acc: 0.8736 | Train F1: 0.8744 | Test Loss: 0.4663 | Test Acc: 0.8944 | Test F1: 0.8932
Epoch 3/7 | Train Loss: 0.3994 | Train Acc: 0.9139 | Train F1: 0.9146 | Test Loss: 0.3959 | Test Acc: 0.9278 | Test F1: 0.9304
Epoch 4/7 | Train Loss: 0.3344 | Train Acc: 0.9306 | Train F1: 0.9310 | Test Loss: 0.3243 | Test Acc: 0.9556 | Test F1: 0.9570
Epoch 5/7 | Train Loss: 0.3059 | Train Acc: 0.9431 | Train F1: 0.9433 | Test Loss: 0.3087 | Test Acc: 0.9556 | Test F1: 0.9570
Epoch 6/7 | Train Loss: 0.2925 | Train Acc: 0.9431 | Train F1: 0.9431 | Test Loss: 0.3087 | Test Acc: 0.9556 | Test F1: 0.9570
Epoch 7/7 | Train Loss: 0.2925 | Train Acc: 0.9431 | Train F1: 0.9431 | Test Loss: 0.3087 | Test Acc: 0.9556 | Test F1: 0.9570
Train time on cuda: 331.256587099997
Inference Loss: 0.2926, Accuracy: 94.31%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▃▅████
test_f1,▁▂▆████
test_loss,█▄▃▁▁▁▁
test_precision,▁▂▇████
test_recall,▁▂▅████
train_acc,▁▇▇████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.94306
inference_loss,0.29255
test_acc,0.95556
test_f1,0.95697
test_loss,0.30871
test_precision,0.96389
test_recall,0.95556
train_acc,0.94306
train_f1,0.9431


wandb: Agent Starting Run: nwsj597m with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0004496835276717262
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.6664 | Train Acc: 0.5028 | Train F1: 0.4885 | Test Loss: 1.1481 | Test Acc: 0.7337 | Test F1: 0.7071
Epoch 2/7 | Train Loss: 0.9034 | Train Acc: 0.8292 | Train F1: 0.8253 | Test Loss: 0.7688 | Test Acc: 0.8315 | Test F1: 0.8222
Epoch 3/7 | Train Loss: 0.6616 | Train Acc: 0.8639 | Train F1: 0.8623 | Test Loss: 0.6097 | Test Acc: 0.8804 | Test F1: 0.8751
Epoch 4/7 | Train Loss: 0.5625 | Train Acc: 0.8861 | Train F1: 0.8856 | Test Loss: 0.5516 | Test Acc: 0.8804 | Test F1: 0.8804
Epoch 5/7 | Train Loss: 0.5170 | Train Acc: 0.8944 | Train F1: 0.8943 | Test Loss: 0.5170 | Test Acc: 0.8967 | Test F1: 0.8937
Epoch 6/7 | Train Loss: 0.5003 | Train Acc: 0.9014 | Train F1: 0.9020 | Test Loss: 0.5169 | Test Acc: 0.8967 | Test F1: 0.8937
Epoch 7/7 | Train Loss: 0.5003 | Train Acc: 0.9014 | Train F1: 0.9020 | Test Loss: 0.5243 | Test Acc: 0.8859 | Test F1: 0.8937
Train time on cuda: 333.3131334999998
Inference Loss: 0.5003, Accuracy: 90.14%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▇▇███
test_f1,▁▅▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▆▇████
test_recall,▁▅▇████
train_acc,▁▇▇████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.90139
inference_loss,0.50034
test_acc,0.88587
test_f1,0.89368
test_loss,0.52427
test_precision,0.90518
test_recall,0.89444
train_acc,0.90139
train_f1,0.90203


wandb: Agent Starting Run: d2dhr5qm with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0005874947011749158
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.6124 | Train Acc: 0.5000 | Train F1: 0.4983 | Test Loss: 1.0056 | Test Acc: 0.7609 | Test F1: 0.7562
Epoch 2/7 | Train Loss: 0.7676 | Train Acc: 0.8667 | Train F1: 0.8664 | Test Loss: 0.6086 | Test Acc: 0.9130 | Test F1: 0.9068
Epoch 3/7 | Train Loss: 0.5423 | Train Acc: 0.8972 | Train F1: 0.8962 | Test Loss: 0.4936 | Test Acc: 0.9402 | Test F1: 0.9455
Epoch 4/7 | Train Loss: 0.4590 | Train Acc: 0.9111 | Train F1: 0.9122 | Test Loss: 0.4373 | Test Acc: 0.9457 | Test F1: 0.9452
Epoch 5/7 | Train Loss: 0.4201 | Train Acc: 0.9306 | Train F1: 0.9311 | Test Loss: 0.4142 | Test Acc: 0.9457 | Test F1: 0.9452
Epoch 6/7 | Train Loss: 0.4071 | Train Acc: 0.9319 | Train F1: 0.9325 | Test Loss: 0.4122 | Test Acc: 0.9457 | Test F1: 0.9452
Epoch 7/7 | Train Loss: 0.4071 | Train Acc: 0.9319 | Train F1: 0.9325 | Test Loss: 0.4099 | Test Acc: 0.9457 | Test F1: 0.9452
Train time on cuda: 333.29068859999825
Inference Loss: 0.4071, Accuracy: 93.19%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▇█████
test_f1,▁▇█████
test_loss,█▃▂▁▁▁▁
test_precision,▁▆█████
test_recall,▁▇█████
train_acc,▁▇▇████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.93194
inference_loss,0.40709
test_acc,0.94565
test_f1,0.94524
test_loss,0.4099
test_precision,0.95226
test_recall,0.94444
train_acc,0.93194
train_f1,0.93246


wandb: Agent Starting Run: 5yfit20x with config:
wandb: 	batch_size: 32
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00035763366071402423
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 2.1611 | Train Acc: 0.2255 | Train F1: 0.1935 | Test Loss: 1.9975 | Test Acc: 0.3104 | Test F1: 0.2852
Epoch 2/7 | Train Loss: 1.8211 | Train Acc: 0.4226 | Train F1: 0.3906 | Test Loss: 1.7358 | Test Acc: 0.4990 | Test F1: 0.4886
Epoch 3/7 | Train Loss: 1.6031 | Train Acc: 0.6046 | Train F1: 0.5883 | Test Loss: 1.5457 | Test Acc: 0.6156 | Test F1: 0.6016
Epoch 4/7 | Train Loss: 1.4582 | Train Acc: 0.6957 | Train F1: 0.6840 | Test Loss: 1.4454 | Test Acc: 0.6958 | Test F1: 0.6960
Epoch 5/7 | Train Loss: 1.3871 | Train Acc: 0.7337 | Train F1: 0.7289 | Test Loss: 1.4000 | Test Acc: 0.7177 | Test F1: 0.7149
Epoch 6/7 | Train Loss: 1.3594 | Train Acc: 0.7514 | Train F1: 0.7447 | Test Loss: 1.3884 | Test Acc: 0.7365 | Test F1: 0.7149
Epoch 7/7 | Train Loss: 1.3551 | Train Acc: 0.7527 | Train F1: 0.7447 | Test Loss: 1.3888 | Test Acc: 0.7240 | Test F1: 0.7149
Train time on cuda: 335.8239262999996
Inference Loss: 1.3578, Accuracy: 75.28%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▄▆▇███
test_f1,▁▄▆████
test_loss,█▅▃▂▁▁▁
test_precision,▁▅▆████
test_recall,▁▄▆████
train_acc,▁▄▆▇███
train_f1,▁▄▆▇███

0,1
epoch,7
inference_accuracy,0.75278
inference_loss,1.35782
test_acc,0.72396
test_f1,0.71491
test_loss,1.38884
test_precision,0.71756
test_recall,0.72222
train_acc,0.75272
train_f1,0.7447


wandb: Agent Starting Run: utorwvrq with config:
wandb: 	batch_size: 8
wandb: 	epochs: 7
wandb: 	learning_rate: 0.00046981054752811706
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.6167 | Train Acc: 0.5153 | Train F1: 0.5149 | Test Loss: 1.0917 | Test Acc: 0.7663 | Test F1: 0.7223
Epoch 2/7 | Train Loss: 0.8422 | Train Acc: 0.8444 | Train F1: 0.8427 | Test Loss: 0.7013 | Test Acc: 0.8859 | Test F1: 0.8857
Epoch 3/7 | Train Loss: 0.6152 | Train Acc: 0.8736 | Train F1: 0.8720 | Test Loss: 0.5622 | Test Acc: 0.9022 | Test F1: 0.8991
Epoch 4/7 | Train Loss: 0.5239 | Train Acc: 0.8958 | Train F1: 0.8954 | Test Loss: 0.5084 | Test Acc: 0.9130 | Test F1: 0.9149
Epoch 5/7 | Train Loss: 0.4804 | Train Acc: 0.9069 | Train F1: 0.9076 | Test Loss: 0.4912 | Test Acc: 0.9076 | Test F1: 0.9159
Epoch 6/7 | Train Loss: 0.4661 | Train Acc: 0.9139 | Train F1: 0.9144 | Test Loss: 0.4869 | Test Acc: 0.9130 | Test F1: 0.9159
Epoch 7/7 | Train Loss: 0.4661 | Train Acc: 0.9139 | Train F1: 0.9144 | Test Loss: 0.4753 | Test Acc: 0.9185 | Test F1: 0.9159
Train time on cuda: 333.2827094000022
Inference Loss: 0.4662, Accuracy: 91.39%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▇▇█▇██
test_f1,▁▇▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▇▇████
test_recall,▁▇▇████
train_acc,▁▇▇████
train_f1,▁▇▇████

0,1
epoch,7
inference_accuracy,0.91389
inference_loss,0.46619
test_acc,0.91848
test_f1,0.91593
test_loss,0.47529
test_precision,0.91754
test_recall,0.91667
train_acc,0.91389
train_f1,0.91439


wandb: Agent Starting Run: 1lrf7jgi with config:
wandb: 	batch_size: 16
wandb: 	epochs: 7
wandb: 	learning_rate: 0.0007086596902234274
wandb: 	optimizer: adamW
wandb: 	scheduler: PolynomialLR


ASTForAudioClassification(
  (audio_spectrogram_transformer): ASTModel(
    (embeddings): ASTEmbeddings(
      (patch_embeddings): ASTPatchEmbeddings(
        (projection): Conv2d(1, 768, kernel_size=(16, 16), stride=(10, 10))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ASTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ASTLayer(
          (attention): ASTSdpaAttention(
            (attention): ASTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ASTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ASTIntermediate(
       

  0%|          | 0/7 [00:00<?, ?it/s]

Epoch 1/7 | Train Loss: 1.7158 | Train Acc: 0.4625 | Train F1: 0.4613 | Test Loss: 1.1552 | Test Acc: 0.7448 | Test F1: 0.7258
Epoch 2/7 | Train Loss: 0.9615 | Train Acc: 0.7847 | Train F1: 0.7756 | Test Loss: 0.7968 | Test Acc: 0.8333 | Test F1: 0.8335
Epoch 3/7 | Train Loss: 0.6915 | Train Acc: 0.8431 | Train F1: 0.8411 | Test Loss: 0.6362 | Test Acc: 0.8646 | Test F1: 0.8699
Epoch 4/7 | Train Loss: 0.5846 | Train Acc: 0.8681 | Train F1: 0.8670 | Test Loss: 0.5556 | Test Acc: 0.8906 | Test F1: 0.8981
Epoch 5/7 | Train Loss: 0.5351 | Train Acc: 0.8861 | Train F1: 0.8866 | Test Loss: 0.5135 | Test Acc: 0.9010 | Test F1: 0.8933
Epoch 6/7 | Train Loss: 0.5186 | Train Acc: 0.8972 | Train F1: 0.8983 | Test Loss: 0.5470 | Test Acc: 0.8698 | Test F1: 0.8933
Epoch 7/7 | Train Loss: 0.5186 | Train Acc: 0.8972 | Train F1: 0.8983 | Test Loss: 0.5264 | Test Acc: 0.9010 | Test F1: 0.8933
Train time on cuda: 334.74230759999773
Inference Loss: 0.5186, Accuracy: 89.72%


VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▂▃▅▆▇█
inference_accuracy,▁
inference_loss,▁
test_acc,▁▅▆██▇█
test_f1,▁▅▇████
test_loss,█▄▂▁▁▁▁
test_precision,▁▅▆████
test_recall,▁▅▇████
train_acc,▁▆▇████
train_f1,▁▆▇▇███

0,1
epoch,7
inference_accuracy,0.89722
inference_loss,0.51858
test_acc,0.90104
test_f1,0.89329
test_loss,0.52637
test_precision,0.90506
test_recall,0.89444
train_acc,0.89722
train_f1,0.89828


In [8]:
# inference_loop(model=model,
#                device=device,
#                loss_fn=loss_fn,
#                inference_loader= inference_dataloader_custom)



# if not multiple_runs and wandb_init:
#     wandb.finish()

In [9]:
# if SAVE_MODEL:
#     save_model(model=model,
#             target_dir="saved_models",
#             model_name="AST_classifier_true.pt")